From 2b717bb195a3034853ed45a52c5752f010e1302b Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Tue, 23 Apr 2024 02:35:25 +0900
Subject: [PATCH 01/58] fix initial corrupt model loop

if for some reason the initial loading model at loading phase of webui  is corrupted
after entering this state the user will not be able to load even a good model is selected, due the the unload_model_weights  > send_model_to_cpu > m.lowvram attribute check will fail becaules m is None
webui will be stuck in the loop unable to recover without manual intervention
---
 modules/sd_models.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index ff245b7a6..1747ca621 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -659,10 +659,11 @@ def get_empty_cond(sd_model):
 
 
 def send_model_to_cpu(m):
-    if m.lowvram:
-        lowvram.send_everything_to_cpu()
-    else:
-        m.to(devices.cpu)
+    if m is not None:
+        if m.lowvram:
+            lowvram.send_everything_to_cpu()
+        else:
+            m.to(devices.cpu)
 
     devices.torch_gc()
 

From 4bc39d234d6535e3d8f8531d0c0f4e049261c922 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Tue, 23 Apr 2024 02:39:45 +0900
Subject: [PATCH 02/58] Show LoRA if model is None

---
 .../Lora/ui_extra_networks_lora.py            | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index b627f7dc2..e35d90c6e 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -60,18 +60,19 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         else:
             sd_version = lora_on_disk.sd_version
 
-        if shared.opts.lora_show_all or not enable_filter:
-            pass
-        elif sd_version == network.SdVersion.Unknown:
-            model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
-            if model_version.name in shared.opts.lora_hide_unknown_for_versions:
+        if shared.sd_model is not None:  # still show LoRA in case an error occurs during initial model loading
+            if shared.opts.lora_show_all or not enable_filter:
+                pass
+            elif sd_version == network.SdVersion.Unknown:
+                model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
+                if model_version.name in shared.opts.lora_hide_unknown_for_versions:
+                    return None
+            elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
+                return None
+            elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
+                return None
+            elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
                 return None
-        elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
-            return None
-        elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
-            return None
-        elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
-            return None
 
         return item
 

From a1aa0af8a45f4c30f1d3fce5635c090d64d4e55b Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Mon, 22 Apr 2024 23:38:44 -0400
Subject: [PATCH 03/58] add code for skipping CFG on early steps

---
 modules/sd_samplers_cfg_denoiser.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 93581c9ac..8ccc837aa 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -212,6 +212,11 @@ class CFGDenoiser(torch.nn.Module):
         uncond = denoiser_params.text_uncond
         skip_uncond = False
 
+        if self.step < shared.opts.skip_cond_steps:
+            skip_uncond = True
+            x_in = x_in[:-batch_size]
+            sigma_in = sigma_in[:-batch_size]
+
         # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
         if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
             skip_uncond = True

From 8016d78a4b9c8bdd02b0031694ad56553f89161e Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Mon, 22 Apr 2024 23:42:24 -0400
Subject: [PATCH 04/58] add option for early cfg skip

---
 modules/shared_options.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/shared_options.py b/modules/shared_options.py
index 326a317e0..2f70ef65a 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -380,7 +380,8 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
     'uni_pc_skip_type': OptionInfo("time_uniform", "UniPC skip type", gr.Radio, {"choices": ["time_uniform", "time_quadratic", "logSNR"]}, infotext='UniPC skip type'),
     'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"),
     'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
-    'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models")
+    'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"),
+    'skip_cond_steps': OptionInfo(0, "Skip CFG on first N steps of sampling", gr.Slider, {"minimum": 0, "maximum": 50, "step": 1}, infotext="Skip CFG first steps"),
 }))
 
 options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {

From 83266205d0b55ddbff34ea36b47f69c5ea11cc28 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:09:43 -0400
Subject: [PATCH 05/58] Add KL Optimal scheduler

---
 modules/sd_schedulers.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 75eb3ac03..10ae4e081 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -31,6 +31,15 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     return torch.FloatTensor(sigs).to(device)
 
 
+def kl_optimal(n, sigma_min, sigma_max, device):
+    alpha_min = torch.arctan(torch.tensor(sigma_min, device=device))
+    alpha_max = torch.arctan(torch.tensor(sigma_max, device=device))
+    sigmas = torch.empty((n+1,), device=device)
+    for i in range(n+1):
+        sigmas[i] = torch.tan((i/n) * alpha_min + (1.0-i/n) * alpha_max)
+    return sigmas
+
+
 schedulers = [
     Scheduler('automatic', 'Automatic', None),
     Scheduler('uniform', 'Uniform', uniform, need_inner_model=True),
@@ -38,6 +47,7 @@ schedulers = [
     Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential),
     Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0),
     Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]),
+    Scheduler('kl_optimal', 'KL Optimal', kl_optimal),
 ]
 
 schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}}

From 83182d2799f12ee2b5e5425d750db062ad67eb90 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:07:25 -0400
Subject: [PATCH 06/58] change skip early cond option name and to float

---
 modules/shared_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared_options.py b/modules/shared_options.py
index 2f70ef65a..91ba72b5e 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -381,7 +381,7 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
     'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"),
     'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
     'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"),
-    'skip_cond_steps': OptionInfo(0, "Skip CFG on first N steps of sampling", gr.Slider, {"minimum": 0, "maximum": 50, "step": 1}, infotext="Skip CFG first steps"),
+    'skip_early_cond': OptionInfo(0, "Skip CFG during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("CFG will be disabled (set to 1) on early steps, can both improve sample diversity/quality and speed up sampling"),
 }))
 
 options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {

From 6e9b69a33853e1bcee81cea6f01cf13de612fef7 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:08:28 -0400
Subject: [PATCH 07/58] change skip_early_cond code to use float

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 8ccc837aa..fba5c48c0 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -212,7 +212,7 @@ class CFGDenoiser(torch.nn.Module):
         uncond = denoiser_params.text_uncond
         skip_uncond = False
 
-        if self.step < shared.opts.skip_cond_steps:
+        if self.step / self.total_steps <= shared.opts.skip_early_cond:
             skip_uncond = True
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]

From 33cbbf9f8b46666a2325c98b723b6cb2ec192ef7 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:15:00 -0400
Subject: [PATCH 08/58] add s_min_uncond_all option

---
 modules/shared_options.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/shared_options.py b/modules/shared_options.py
index 91ba72b5e..c711fa5f6 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -210,6 +210,7 @@ options_templates.update(options_section(('img2img', "img2img", "sd"), {
 options_templates.update(options_section(('optimizations', "Optimizations", "sd"), {
     "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}),
     "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
+    "s_min_uncond_all": OptionInfo(False, "NGMS: Skip every step").info("makes Negative Guidance minimum sigma skip negative guidance on every step instead of only half"),
     "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"),

From 029adbe5318b57c04dbc0d92273cce38e1ecf457 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:15:56 -0400
Subject: [PATCH 09/58] implement option to skip uncond on all steps below ngms

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index fba5c48c0..082a4f63c 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -218,7 +218,7 @@ class CFGDenoiser(torch.nn.Module):
             sigma_in = sigma_in[:-batch_size]
 
         # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
-        if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
+        if (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
             skip_uncond = True
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]

From 3a215deff23d28c06c8de98423c12628b8ce6326 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Sun, 28 Apr 2024 00:15:58 -0400
Subject: [PATCH 10/58] vectorize kl-optimal sigma calculation

Co-authored-by: mamei16 <marcel.1710@live.de>
---
 modules/sd_schedulers.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 10ae4e081..99a6f7be2 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -34,9 +34,8 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
 def kl_optimal(n, sigma_min, sigma_max, device):
     alpha_min = torch.arctan(torch.tensor(sigma_min, device=device))
     alpha_max = torch.arctan(torch.tensor(sigma_max, device=device))
-    sigmas = torch.empty((n+1,), device=device)
-    for i in range(n+1):
-        sigmas[i] = torch.tan((i/n) * alpha_min + (1.0-i/n) * alpha_max)
+    step_indices = torch.arange(n + 1, device=device)
+    sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max)
     return sigmas
 
 

From 579f1ef278080ff7545be3a42c5fe36fc2890887 Mon Sep 17 00:00:00 2001
From: missionfloyd <missionfloyd@users.noreply.github.com>
Date: Sun, 28 Apr 2024 22:36:43 -0600
Subject: [PATCH 11/58] Allow old sampler names in API

---
 modules/api/api.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/modules/api/api.py b/modules/api/api.py
index f468c3852..b1201fe77 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -48,6 +48,15 @@ def validate_sampler_name(name):
     return name
 
 
+def parse_old_sampler_name(name):
+    for scheduler in sd_schedulers.schedulers:
+        for scheduler_name in [scheduler.label, scheduler.name, *(scheduler.aliases or [])]:
+            if name.endswith(" " + scheduler_name):
+                return name[0:-(len(scheduler_name) + 1)], scheduler_name
+
+    return name, "Automatic"
+
+
 def setUpscalers(req: dict):
     reqDict = vars(req)
     reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None)
@@ -438,15 +447,19 @@ class Api:
         self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner)
+        sampler, scheduler = parse_old_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index)
 
         populate = txt2imgreq.copy(update={  # Override __init__ params
-            "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
+            "sampler_name": validate_sampler_name(sampler),
             "do_not_save_samples": not txt2imgreq.save_images,
             "do_not_save_grid": not txt2imgreq.save_images,
         })
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
+        if not populate.scheduler:
+            populate.scheduler = scheduler
+
         args = vars(populate)
         args.pop('script_name', None)
         args.pop('script_args', None) # will refeed them to the pipeline directly after initializing them
@@ -502,9 +515,10 @@ class Api:
         self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner)
+        sampler, scheduler = parse_old_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index)
 
         populate = img2imgreq.copy(update={  # Override __init__ params
-            "sampler_name": validate_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index),
+            "sampler_name": validate_sampler_name(sampler),
             "do_not_save_samples": not img2imgreq.save_images,
             "do_not_save_grid": not img2imgreq.save_images,
             "mask": mask,
@@ -512,6 +526,9 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
+        if not populate.scheduler:
+            populate.scheduler = scheduler
+
         args = vars(populate)
         args.pop('include_init_images', None)  # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
         args.pop('script_name', None)

From c8336c45b98c2226923503e17b1d7f9170af0f8a Mon Sep 17 00:00:00 2001
From: missionfloyd <missionfloyd@users.noreply.github.com>
Date: Tue, 30 Apr 2024 01:53:41 -0600
Subject: [PATCH 12/58] Use existing function for old sampler names

---
 modules/api/api.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/modules/api/api.py b/modules/api/api.py
index b1201fe77..d8e54529b 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -48,15 +48,6 @@ def validate_sampler_name(name):
     return name
 
 
-def parse_old_sampler_name(name):
-    for scheduler in sd_schedulers.schedulers:
-        for scheduler_name in [scheduler.label, scheduler.name, *(scheduler.aliases or [])]:
-            if name.endswith(" " + scheduler_name):
-                return name[0:-(len(scheduler_name) + 1)], scheduler_name
-
-    return name, "Automatic"
-
-
 def setUpscalers(req: dict):
     reqDict = vars(req)
     reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None)
@@ -447,7 +438,7 @@ class Api:
         self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner)
-        sampler, scheduler = parse_old_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index)
+        sampler, scheduler = sd_samplers.get_sampler_and_scheduler(txt2imgreq.sampler_name or txt2imgreq.sampler_index, txt2imgreq.scheduler)
 
         populate = txt2imgreq.copy(update={  # Override __init__ params
             "sampler_name": validate_sampler_name(sampler),
@@ -457,7 +448,7 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
-        if not populate.scheduler:
+        if not populate.scheduler and scheduler != "Automatic":
             populate.scheduler = scheduler
 
         args = vars(populate)
@@ -515,7 +506,7 @@ class Api:
         self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner)
-        sampler, scheduler = parse_old_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index)
+        sampler, scheduler = sd_samplers.get_sampler_and_scheduler(img2imgreq.sampler_name or img2imgreq.sampler_index, img2imgreq.scheduler)
 
         populate = img2imgreq.copy(update={  # Override __init__ params
             "sampler_name": validate_sampler_name(sampler),
@@ -526,7 +517,7 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
-        if not populate.scheduler:
+        if not populate.scheduler and scheduler != "Automatic":
             populate.scheduler = scheduler
 
         args = vars(populate)

From 5d5224b322e8dbd817469a32d6c5578faff2df2f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Thu, 2 May 2024 02:25:16 +0900
Subject: [PATCH 13/58] fix_p_invalid_sampler_and_scheduler

---
 modules/processing.py  | 3 +++
 modules/sd_samplers.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index 76557dd7f..cb646e2bf 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -842,6 +842,9 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
         sd_models.apply_token_merging(p.sd_model, p.get_token_merging_ratio())
 
+        # backwards compatibility, fix sampler and scheduler if invalid
+        sd_samplers.fix_p_invalid_sampler_and_scheduler(p)
+
         res = process_images_inner(p)
 
     finally:
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 6b7b84b6d..b8abac4a9 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import functools
-
+import logging
 from modules import sd_samplers_kdiffusion, sd_samplers_timesteps, sd_samplers_lcm, shared, sd_samplers_common, sd_schedulers
 
 # imports for functions that previously were here and are used by other modules
@@ -122,4 +122,11 @@ def get_sampler_and_scheduler(sampler_name, scheduler_name):
     return sampler.name, found_scheduler.label
 
 
+def fix_p_invalid_sampler_and_scheduler(p):
+    i_sampler_name, i_scheduler = p.sampler_name, p.scheduler
+    p.sampler_name, p.scheduler = get_sampler_and_scheduler(p.sampler_name, p.scheduler)
+    if p.sampler_name != i_sampler_name or i_scheduler != p.scheduler:
+        logging.warning(f'Sampler Scheduler autocorrection: "{i_sampler_name}" -> "{p.sampler_name}", "{i_scheduler}" -> "{p.scheduler}"')
+
+
 set_samplers()

From 73d1caf8f28a387f2db5a77a8892edad8ed505a0 Mon Sep 17 00:00:00 2001
From: Logan <loganbooker@gmail.com>
Date: Fri, 10 May 2024 12:38:10 +1000
Subject: [PATCH 14/58] Add Align Your Steps to available schedulers

* Include both SDXL and SD 1.5 variants (https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html)
---
 modules/sd_schedulers.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 75eb3ac03..2131eae46 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -4,6 +4,7 @@ import torch
 
 import k_diffusion
 
+import numpy as np
 
 @dataclasses.dataclass
 class Scheduler:
@@ -30,6 +31,35 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     sigs += [0.0]
     return torch.FloatTensor(sigs).to(device)
 
+def get_align_your_steps_sigmas(n, device, sigma_id):
+    # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
+    def loglinear_interp(t_steps, num_steps):
+        """
+        Performs log-linear interpolation of a given array of decreasing numbers.
+        """
+        xs = np.linspace(0, 1, len(t_steps))
+        ys = np.log(t_steps[::-1])
+
+        new_xs = np.linspace(0, 1, num_steps)
+        new_ys = np.interp(new_xs, xs, ys)
+
+        interped_ys = np.exp(new_ys)[::-1].copy()
+        return interped_ys
+
+    if sigma_id == "sdxl":
+        sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.029]
+    elif sigma_id == "sd15":
+        sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
+    else:
+        print(f'Align Your Steps sigma identifier "{sigma_id}" not recognized, defaulting to SD 1.5.')
+        sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
+
+    if n != len(sigmas):
+        sigmas = np.append(loglinear_interp(sigmas, n), [0.0])
+    else:
+        sigmas.append(0.0)
+
+    return torch.FloatTensor(sigmas).to(device)
 
 schedulers = [
     Scheduler('automatic', 'Automatic', None),
@@ -38,6 +68,8 @@ schedulers = [
     Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential),
     Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0),
     Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]),
+    Scheduler('align_your_steps_sdxl', 'Align Your Steps (SDXL)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sdxl")),
+    Scheduler('align_your_steps_sd15', 'Align Your Steps (SD 1.5)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sd15")),
 ]
 
 schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}}

From d6b4444069d36cf7554eb9932061ecf43e9b1335 Mon Sep 17 00:00:00 2001
From: Logan <loganbooker@gmail.com>
Date: Fri, 10 May 2024 18:05:45 +1000
Subject: [PATCH 15/58] Use shared.sd_model.is_sdxl to determine base AYS
 sigmas

---
 modules/sd_schedulers.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 2131eae46..0ac1f7a21 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -6,6 +6,8 @@ import k_diffusion
 
 import numpy as np
 
+from modules import shared
+
 @dataclasses.dataclass
 class Scheduler:
     name: str
@@ -31,7 +33,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     sigs += [0.0]
     return torch.FloatTensor(sigs).to(device)
 
-def get_align_your_steps_sigmas(n, device, sigma_id):
+def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device):
     # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
     def loglinear_interp(t_steps, num_steps):
         """
@@ -46,12 +48,10 @@ def get_align_your_steps_sigmas(n, device, sigma_id):
         interped_ys = np.exp(new_ys)[::-1].copy()
         return interped_ys
 
-    if sigma_id == "sdxl":
+    if shared.sd_model.is_sdxl:
         sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.029]
-    elif sigma_id == "sd15":
-        sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
     else:
-        print(f'Align Your Steps sigma identifier "{sigma_id}" not recognized, defaulting to SD 1.5.')
+        # Default to SD 1.5 sigmas.
         sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
 
     if n != len(sigmas):
@@ -68,8 +68,7 @@ schedulers = [
     Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential),
     Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0),
     Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]),
-    Scheduler('align_your_steps_sdxl', 'Align Your Steps (SDXL)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sdxl")),
-    Scheduler('align_your_steps_sd15', 'Align Your Steps (SD 1.5)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sd15")),
+    Scheduler('align_your_steps', 'Align Your Steps', get_align_your_steps_sigmas),
 ]
 
 schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}}

From 5ab7d08a0a99c88a60a13885e564fd7d2d05cfc1 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 15 May 2024 17:27:05 +0900
Subject: [PATCH 16/58] fix extention update when not on main branch

---
 modules/extensions.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/modules/extensions.py b/modules/extensions.py
index 5ad934b4d..24de766eb 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -191,8 +191,9 @@ class Extension:
 
     def check_updates(self):
         repo = Repo(self.path)
+        branch_name = f'{repo.remote().name}/{self.branch}'
         for fetch in repo.remote().fetch(dry_run=True):
-            if self.branch and fetch.name != f'{repo.remote().name}/{self.branch}':
+            if self.branch and fetch.name != branch_name:
                 continue
             if fetch.flags != fetch.HEAD_UPTODATE:
                 self.can_update = True
@@ -200,7 +201,7 @@ class Extension:
                 return
 
         try:
-            origin = repo.rev_parse('origin')
+            origin = repo.rev_parse(branch_name)
             if repo.head.commit != origin:
                 self.can_update = True
                 self.status = "behind HEAD"
@@ -213,8 +214,10 @@ class Extension:
         self.can_update = False
         self.status = "latest"
 
-    def fetch_and_reset_hard(self, commit='origin'):
+    def fetch_and_reset_hard(self, commit=None):
         repo = Repo(self.path)
+        if commit is None:
+            commit = f'{repo.remote().name}/{self.branch}'
         # Fix: `error: Your local changes to the following files would be overwritten by merge`,
         # because WSL2 Docker set 755 file permissions instead of 644, this results to the error.
         repo.git.fetch(all=True)

From 022d835565f253841f7f9272ba320bb0cec4770d Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 15:20:40 -0400
Subject: [PATCH 17/58] use_checkpoint = False

---
 configs/alt-diffusion-inference.yaml     | 2 +-
 configs/alt-diffusion-m18-inference.yaml | 2 +-
 configs/instruct-pix2pix.yaml            | 2 +-
 configs/sd_xl_inpaint.yaml               | 2 +-
 configs/v1-inference.yaml                | 2 +-
 configs/v1-inpainting-inference.yaml     | 2 +-
 modules/sd_hijack_checkpoint.py          | 9 ++++++---
 modules/sd_models_config.py              | 2 +-
 8 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/configs/alt-diffusion-inference.yaml b/configs/alt-diffusion-inference.yaml
index cfbee72d7..4944ab5c8 100644
--- a/configs/alt-diffusion-inference.yaml
+++ b/configs/alt-diffusion-inference.yaml
@@ -40,7 +40,7 @@ model:
         use_spatial_transformer: True
         transformer_depth: 1
         context_dim: 768
-        use_checkpoint: True
+        use_checkpoint: False
         legacy: False
 
     first_stage_config:
diff --git a/configs/alt-diffusion-m18-inference.yaml b/configs/alt-diffusion-m18-inference.yaml
index 41a031d55..c60dca8c7 100644
--- a/configs/alt-diffusion-m18-inference.yaml
+++ b/configs/alt-diffusion-m18-inference.yaml
@@ -41,7 +41,7 @@ model:
         use_linear_in_transformer: True
         transformer_depth: 1
         context_dim: 1024
-        use_checkpoint: True
+        use_checkpoint: False
         legacy: False
 
     first_stage_config:
diff --git a/configs/instruct-pix2pix.yaml b/configs/instruct-pix2pix.yaml
index 4e896879d..564e50ae2 100644
--- a/configs/instruct-pix2pix.yaml
+++ b/configs/instruct-pix2pix.yaml
@@ -45,7 +45,7 @@ model:
         use_spatial_transformer: True
         transformer_depth: 1
         context_dim: 768
-        use_checkpoint: True
+        use_checkpoint: False
         legacy: False
 
     first_stage_config:
diff --git a/configs/sd_xl_inpaint.yaml b/configs/sd_xl_inpaint.yaml
index 3bad37218..f40f45e33 100644
--- a/configs/sd_xl_inpaint.yaml
+++ b/configs/sd_xl_inpaint.yaml
@@ -21,7 +21,7 @@ model:
       params:
         adm_in_channels: 2816
         num_classes: sequential
-        use_checkpoint: True
+        use_checkpoint: False
         in_channels: 9
         out_channels: 4
         model_channels: 320
diff --git a/configs/v1-inference.yaml b/configs/v1-inference.yaml
index d4effe569..25c4d9ed0 100644
--- a/configs/v1-inference.yaml
+++ b/configs/v1-inference.yaml
@@ -40,7 +40,7 @@ model:
         use_spatial_transformer: True
         transformer_depth: 1
         context_dim: 768
-        use_checkpoint: True
+        use_checkpoint: False
         legacy: False
 
     first_stage_config:
diff --git a/configs/v1-inpainting-inference.yaml b/configs/v1-inpainting-inference.yaml
index f9eec37d2..68c199f99 100644
--- a/configs/v1-inpainting-inference.yaml
+++ b/configs/v1-inpainting-inference.yaml
@@ -40,7 +40,7 @@ model:
         use_spatial_transformer: True
         transformer_depth: 1
         context_dim: 768
-        use_checkpoint: True
+        use_checkpoint: False
         legacy: False
 
     first_stage_config:
diff --git a/modules/sd_hijack_checkpoint.py b/modules/sd_hijack_checkpoint.py
index 2604d969f..b2f05bbdc 100644
--- a/modules/sd_hijack_checkpoint.py
+++ b/modules/sd_hijack_checkpoint.py
@@ -4,16 +4,19 @@ import ldm.modules.attention
 import ldm.modules.diffusionmodules.openaimodel
 
 
+# Setting flag=False so that torch skips checking parameters.
+# parameters checking is expensive in frequent operations.
+
 def BasicTransformerBlock_forward(self, x, context=None):
-    return checkpoint(self._forward, x, context)
+    return checkpoint(self._forward, x, context, flag=False)
 
 
 def AttentionBlock_forward(self, x):
-    return checkpoint(self._forward, x)
+    return checkpoint(self._forward, x, flag=False)
 
 
 def ResBlock_forward(self, x, emb):
-    return checkpoint(self._forward, x, emb)
+    return checkpoint(self._forward, x, emb, flag=False)
 
 
 stored = []
diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py
index b38137eb5..9cec4f13d 100644
--- a/modules/sd_models_config.py
+++ b/modules/sd_models_config.py
@@ -35,7 +35,7 @@ def is_using_v_parameterization_for_sd2(state_dict):
 
     with sd_disable_initialization.DisableInitialization():
         unet = ldm.modules.diffusionmodules.openaimodel.UNetModel(
-            use_checkpoint=True,
+            use_checkpoint=False,
             use_fp16=False,
             image_size=32,
             in_channels=4,

From 0e98529365477a4f240b2ac67d94ff59235144c5 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 15:46:53 -0400
Subject: [PATCH 18/58] Replace einops.rearrange with torch native

---
 modules/sd_hijack_optimizations.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 7f9e328d0..4c2dc56d4 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -486,7 +486,19 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
     k_in = self.to_k(context_k)
     v_in = self.to_v(context_v)
 
-    q, k, v = (rearrange(t, 'b n (h d) -> b n h d', h=h) for t in (q_in, k_in, v_in))
+    def _reshape(t):
+        """rearrange(t, 'b n (h d) -> b n h d', h=h).
+        Using torch native operations to avoid overhead as this function is
+        called frequently. (70 times/it for SDXL)
+        """
+        b, n, _ = t.shape  # Get the batch size (b) and sequence length (n)
+        d = t.shape[2] // h  # Determine the depth per head
+        return t.reshape(b, n, h, d)
+
+    q = _reshape(q_in)
+    k = _reshape(k_in)
+    v = _reshape(v_in)
+
     del q_in, k_in, v_in
 
     dtype = q.dtype
@@ -497,7 +509,9 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
 
     out = out.to(dtype)
 
-    out = rearrange(out, 'b n h d -> b n (h d)', h=h)
+    # out = rearrange(out, 'b n h d -> b n (h d)', h=h)
+    b, n, h, d = out.shape
+    out = out.reshape(b, n, h * d)
     return self.to_out(out)
 
 

From 9eb2f786316c0f7e94c3df5f5e8bda203e6b875d Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 16:32:29 -0400
Subject: [PATCH 19/58] Precompute is_sdxl_inpaint flag

---
 modules/processing.py   | 28 +++++++++++-----------------
 modules/sd_models.py    |  7 +++++++
 modules/sd_models_xl.py |  9 ++++-----
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index 76557dd7f..d82cb24fb 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -115,20 +115,17 @@ def txt2img_image_conditioning(sd_model, x, width, height):
         return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device)
 
     else:
-        sd = sd_model.model.state_dict()
-        diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
-        if diffusion_model_input is not None:
-            if diffusion_model_input.shape[1] == 9:
-                # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
-                image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
-                image_conditioning = images_tensor_to_samples(image_conditioning,
-                                                              approximation_indexes.get(opts.sd_vae_encode_method))
+        if sd_model.model.is_sdxl_inpaint:
+            # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
+            image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
+            image_conditioning = images_tensor_to_samples(image_conditioning,
+                                                            approximation_indexes.get(opts.sd_vae_encode_method))
 
-                # Add the fake full 1s mask to the first dimension.
-                image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
-                image_conditioning = image_conditioning.to(x.dtype)
+            # Add the fake full 1s mask to the first dimension.
+            image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+            image_conditioning = image_conditioning.to(x.dtype)
 
-                return image_conditioning
+            return image_conditioning
 
         # Dummy zero conditioning if we're not using inpainting or unclip models.
         # Still takes up a bit of memory, but no encoder call.
@@ -390,11 +387,8 @@ class StableDiffusionProcessing:
         if self.sampler.conditioning_key == "crossattn-adm":
             return self.unclip_image_conditioning(source_image)
 
-        sd = self.sampler.model_wrap.inner_model.model.state_dict()
-        diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
-        if diffusion_model_input is not None:
-            if diffusion_model_input.shape[1] == 9:
-                return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+        if self.sampler.model_wrap.inner_model.model.is_sdxl_inpaint:
+            return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
 
         # Dummy zero conditioning if we're not using inpainting or depth model.
         return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ff245b7a6..62e74d27a 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -380,6 +380,13 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
     model.is_sd2 = not model.is_sdxl and hasattr(model.cond_stage_model, 'model')
     model.is_sd1 = not model.is_sdxl and not model.is_sd2
     model.is_ssd = model.is_sdxl and 'model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight' not in state_dict.keys()
+    # Set is_sdxl_inpaint flag.
+    diffusion_model_input = state_dict.get('diffusion_model.input_blocks.0.0.weight', None)
+    model.is_sdxl_inpaint = (
+        model.is_sdxl and
+        diffusion_model_input is not None and
+        diffusion_model_input.shape[1] == 9
+    )
     if model.is_sdxl:
         sd_models_xl.extend_sdxl(model)
 
diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index 94ff973fb..35e21f6e4 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -35,11 +35,10 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch:
 
 
 def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond):
-    sd = self.model.state_dict()
-    diffusion_model_input = sd.get('diffusion_model.input_blocks.0.0.weight', None)
-    if diffusion_model_input is not None:
-        if diffusion_model_input.shape[1] == 9:
-            x = torch.cat([x] + cond['c_concat'], dim=1)
+    """WARNING: This function is called once per denoising iteration. DO NOT add
+    expensive functionc calls such as `model.state_dict`. """
+    if self.model.is_sdxl_inpaint:
+        x = torch.cat([x] + cond['c_concat'], dim=1)
 
     return self.model(x, t, cond)
 

From 6a48476502d6cdd19cb3d0c7f2a0b92aacd7c01f Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 16:54:26 -0400
Subject: [PATCH 20/58] Fix flag check for SD15

---
 modules/processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index d82cb24fb..fff2595e7 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -115,7 +115,7 @@ def txt2img_image_conditioning(sd_model, x, width, height):
         return x.new_zeros(x.shape[0], 2*sd_model.noise_augmentor.time_embed.dim, dtype=x.dtype, device=x.device)
 
     else:
-        if sd_model.model.is_sdxl_inpaint:
+        if getattr(sd_model.model, "is_sdxl_inpaint", False):
             # The "masked-image" in this case will just be all 0.5 since the entire image is masked.
             image_conditioning = torch.ones(x.shape[0], 3, height, width, device=x.device) * 0.5
             image_conditioning = images_tensor_to_samples(image_conditioning,
@@ -387,7 +387,7 @@ class StableDiffusionProcessing:
         if self.sampler.conditioning_key == "crossattn-adm":
             return self.unclip_image_conditioning(source_image)
 
-        if self.sampler.model_wrap.inner_model.model.is_sdxl_inpaint:
+        if getattr(self.sampler.model_wrap.inner_model.model, "is_sdxl_inpaint", False):
             return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
 
         # Dummy zero conditioning if we're not using inpainting or depth model.

From 3e20b36e8f1b26f24db0c149732fb5479bff68bc Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 17:27:01 -0400
Subject: [PATCH 21/58] Fix attr access

---
 modules/sd_models_xl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index 35e21f6e4..1242a5936 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -37,7 +37,7 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch:
 def apply_model(self: sgm.models.diffusion.DiffusionEngine, x, t, cond):
     """WARNING: This function is called once per denoising iteration. DO NOT add
     expensive functionc calls such as `model.state_dict`. """
-    if self.model.is_sdxl_inpaint:
+    if self.is_sdxl_inpaint:
         x = torch.cat([x] + cond['c_concat'], dim=1)
 
     return self.model(x, t, cond)

From 9c8075ba8e538f695ef25f85e6513227b58b71ce Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Thu, 16 May 2024 23:16:50 +0900
Subject: [PATCH 22/58] torch_utils.float64

return torch.float64 if device is not mps or xpu, else return torch.float32
---
 modules/torch_utils.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/modules/torch_utils.py b/modules/torch_utils.py
index e5b52393e..a07e02853 100644
--- a/modules/torch_utils.py
+++ b/modules/torch_utils.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import torch.nn
+import torch
 
 
 def get_param(model) -> torch.nn.Parameter:
@@ -15,3 +16,11 @@ def get_param(model) -> torch.nn.Parameter:
         return param
 
     raise ValueError(f"No parameters found in model {model!r}")
+
+
+def float64(t: torch.Tensor):
+    """return torch.float64 if device is not mps or xpu, else return torch.float32"""
+    match t.device.type:
+        case 'mps', 'xpu':
+            return torch.float32
+    return torch.float64

From 41f66849c7feac1efd0b9eb6884209be382e9e74 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Thu, 16 May 2024 23:18:20 +0900
Subject: [PATCH 23/58] mps, xpu compatibility

---
 .../soft-inpainting/scripts/soft_inpainting.py           | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py
index f56e1e226..0e629963a 100644
--- a/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py
+++ b/extensions-builtin/soft-inpainting/scripts/soft_inpainting.py
@@ -3,6 +3,7 @@ import gradio as gr
 import math
 from modules.ui_components import InputAccordion
 import modules.scripts as scripts
+from modules.torch_utils import float64
 
 
 class SoftInpaintingSettings:
@@ -79,13 +80,11 @@ def latent_blend(settings, a, b, t):
 
     # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
     # 64-bit operations are used here to allow large exponents.
-    current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001)
+    current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(float64(image_interp)).add_(0.00001)
 
     # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
-    a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_(
-        settings.inpaint_detail_preservation) * one_minus_t3
-    b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_(
-        settings.inpaint_detail_preservation) * t3
+    a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(float64(a)).pow_(settings.inpaint_detail_preservation) * one_minus_t3
+    b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(float64(b)).pow_(settings.inpaint_detail_preservation) * t3
     desired_magnitude = a_magnitude
     desired_magnitude.add_(b_magnitude).pow_(1 / settings.inpaint_detail_preservation)
     del a_magnitude, b_magnitude, t3, one_minus_t3

From f015b94176d6df372ce153eddc018cb3b08c03ba Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Thu, 16 May 2024 23:19:06 +0900
Subject: [PATCH 24/58] use torch_utils.float64

---
 modules/sd_samplers_timesteps_impl.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/sd_samplers_timesteps_impl.py b/modules/sd_samplers_timesteps_impl.py
index 930a64af5..84867d6ee 100644
--- a/modules/sd_samplers_timesteps_impl.py
+++ b/modules/sd_samplers_timesteps_impl.py
@@ -5,13 +5,14 @@ import numpy as np
 
 from modules import shared
 from modules.models.diffusion.uni_pc import uni_pc
+from modules.torch_utils import float64
 
 
 @torch.no_grad()
 def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0):
     alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
     alphas = alphas_cumprod[timesteps]
-    alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32)
+    alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(float64(x))
     sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
     sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy()))
 
@@ -43,7 +44,7 @@ def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=
 def plms(model, x, timesteps, extra_args=None, callback=None, disable=None):
     alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
     alphas = alphas_cumprod[timesteps]
-    alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32)
+    alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(float64(x))
     sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
 
     extra_args = {} if extra_args is None else extra_args

From 51b13a8c54854104f1510956b920399226a932f1 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Thu, 16 May 2024 11:39:01 -0400
Subject: [PATCH 25/58] Prevent uncessary bias backup

---
 extensions-builtin/Lora/networks.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 42b14dc23..360455f87 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -378,7 +378,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         self.network_weights_backup = weights_backup
 
     bias_backup = getattr(self, "network_bias_backup", None)
-    if bias_backup is None:
+    if bias_backup is None and wanted_names != ():
+        if current_names != ():
+            raise RuntimeError("no backup bias found and current bias are not unchanged")
+
         if isinstance(self, torch.nn.MultiheadAttention) and self.out_proj.bias is not None:
             bias_backup = self.out_proj.bias.to(devices.cpu, copy=True)
         elif getattr(self, 'bias', None) is not None:

From b2ae4490b9c225ff020941bcbf36c8975760deba Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Thu, 16 May 2024 14:45:00 -0400
Subject: [PATCH 26/58] Fix LoRA bias error

---
 extensions-builtin/Lora/networks.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 360455f87..aee4e9d9c 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -379,15 +379,17 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
 
     bias_backup = getattr(self, "network_bias_backup", None)
     if bias_backup is None and wanted_names != ():
-        if current_names != ():
-            raise RuntimeError("no backup bias found and current bias are not unchanged")
-
         if isinstance(self, torch.nn.MultiheadAttention) and self.out_proj.bias is not None:
             bias_backup = self.out_proj.bias.to(devices.cpu, copy=True)
         elif getattr(self, 'bias', None) is not None:
             bias_backup = self.bias.to(devices.cpu, copy=True)
         else:
             bias_backup = None
+
+        # Unlike weight which always has value, some modules don't have bias.
+        # Only report if bias is not None and current bias are not unchanged.
+        if bias_backup is not None and current_names != ():
+            raise RuntimeError("no backup bias found and current bias are not unchanged")
         self.network_bias_backup = bias_backup
 
     if current_names != wanted_names:

From 221ac0b9abd2e39ccc6f1969a434f05dcd72b29a Mon Sep 17 00:00:00 2001
From: Andray <light.and.ray@gmail.com>
Date: Thu, 16 May 2024 23:08:24 +0400
Subject: [PATCH 27/58] img2img batch upload method

---
 modules/img2img.py | 20 +++++++++++++++-----
 modules/ui.py      | 31 ++++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/modules/img2img.py b/modules/img2img.py
index a1d042c21..24f869f5c 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -17,11 +17,14 @@ from modules.ui import plaintext_to_html
 import modules.scripts
 
 
-def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=False, scale_by=1.0, use_png_info=False, png_info_props=None, png_info_dir=None):
+def process_batch(p, input, output_dir, inpaint_mask_dir, args, to_scale=False, scale_by=1.0, use_png_info=False, png_info_props=None, png_info_dir=None):
     output_dir = output_dir.strip()
     processing.fix_seed(p)
 
-    batch_images = list(shared.walk_files(input_dir, allowed_extensions=(".png", ".jpg", ".jpeg", ".webp", ".tif", ".tiff")))
+    if isinstance(input, str):
+        batch_images = list(shared.walk_files(input, allowed_extensions=(".png", ".jpg", ".jpeg", ".webp", ".tif", ".tiff")))
+    else:
+        batch_images = [os.path.abspath(x.name) for x in input]
 
     is_inpaint_batch = False
     if inpaint_mask_dir:
@@ -146,7 +149,7 @@ def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args, to_scale=Fal
     return batch_results
 
 
-def img2img(id_task: str, request: gr.Request, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, mask_blur: int, mask_alpha: float, inpainting_fill: int, n_iter: int, batch_size: int, cfg_scale: float, image_cfg_scale: float, denoising_strength: float, selected_scale_tab: int, height: int, width: int, scale_by: float, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, img2img_batch_use_png_info: bool, img2img_batch_png_info_props: list, img2img_batch_png_info_dir: str, *args):
+def img2img(id_task: str, request: gr.Request, mode: int, prompt: str, negative_prompt: str, prompt_styles, init_img, sketch, init_img_with_mask, inpaint_color_sketch, inpaint_color_sketch_orig, init_img_inpaint, init_mask_inpaint, mask_blur: int, mask_alpha: float, inpainting_fill: int, n_iter: int, batch_size: int, cfg_scale: float, image_cfg_scale: float, denoising_strength: float, selected_scale_tab: int, height: int, width: int, scale_by: float, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, img2img_batch_inpaint_mask_dir: str, override_settings_texts, img2img_batch_use_png_info: bool, img2img_batch_png_info_props: list, img2img_batch_png_info_dir: str, img2img_batch_source_type: str, img2img_batch_upload: list, *args):
     override_settings = create_override_settings_dict(override_settings_texts)
 
     is_batch = mode == 5
@@ -221,8 +224,15 @@ def img2img(id_task: str, request: gr.Request, mode: int, prompt: str, negative_
 
     with closing(p):
         if is_batch:
-            assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled"
-            processed = process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, args, to_scale=selected_scale_tab == 1, scale_by=scale_by, use_png_info=img2img_batch_use_png_info, png_info_props=img2img_batch_png_info_props, png_info_dir=img2img_batch_png_info_dir)
+            if img2img_batch_source_type == "upload":
+                assert isinstance(img2img_batch_upload, list) and img2img_batch_upload
+                output_dir = ""
+                inpaint_mask_dir = ""
+                png_info_dir = img2img_batch_png_info_dir if not shared.cmd_opts.hide_ui_dir_config else ""
+                processed = process_batch(p, img2img_batch_upload, output_dir, inpaint_mask_dir, args, to_scale=selected_scale_tab == 1, scale_by=scale_by, use_png_info=img2img_batch_use_png_info, png_info_props=img2img_batch_png_info_props, png_info_dir=png_info_dir)
+            else: # "from dir"
+                assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled"
+                processed = process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, args, to_scale=selected_scale_tab == 1, scale_by=scale_by, use_png_info=img2img_batch_use_png_info, png_info_props=img2img_batch_png_info_props, png_info_dir=img2img_batch_png_info_dir)
 
             if processed is None:
                 processed = Processed(p, [], p.seed, "")
diff --git a/modules/ui.py b/modules/ui.py
index 403425f29..f3ac42367 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -566,18 +566,25 @@ def create_ui():
                                 init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", image_mode="RGBA", elem_id="img_inpaint_mask")
 
                             with gr.TabItem('Batch', id='batch', elem_id="img2img_batch_tab") as tab_batch:
-                                hidden = '<br>Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
-                                gr.HTML(
-                                    "<p style='padding-bottom: 1em;' class=\"text-gray-500\">Process images in a directory on the same machine where the server is running." +
-                                    "<br>Use an empty output directory to save pictures normally instead of writing to the output directory." +
-                                    f"<br>Add inpaint batch mask directory to enable inpaint batch processing."
-                                    f"{hidden}</p>"
-                                )
-                                img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
-                                img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
-                                img2img_batch_inpaint_mask_dir = gr.Textbox(label="Inpaint batch mask directory (required for inpaint batch processing only)", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir")
+                                with gr.Tabs(elem_id="img2img_batch_source"):
+                                    img2img_batch_source_type = gr.Textbox(visible=False, value="upload")
+                                    with gr.TabItem('Upload', id='batch_upload', elem_id="img2img_batch_upload_tab") as tab_batch_upload:
+                                        img2img_batch_upload = gr.Files(label="Files", interactive=True, elem_id="img2img_batch_upload")
+                                    with gr.TabItem('From directory', id='batch_from_dir', elem_id="img2img_batch_from_dir_tab") as tab_batch_from_dir:
+                                        hidden = '<br>Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
+                                        gr.HTML(
+                                            "<p style='padding-bottom: 1em;' class=\"text-gray-500\">Process images in a directory on the same machine where the server is running." +
+                                            "<br>Use an empty output directory to save pictures normally instead of writing to the output directory." +
+                                            f"<br>Add inpaint batch mask directory to enable inpaint batch processing."
+                                            f"{hidden}</p>"
+                                        )
+                                        img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
+                                        img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
+                                        img2img_batch_inpaint_mask_dir = gr.Textbox(label="Inpaint batch mask directory (required for inpaint batch processing only)", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir")
+                                tab_batch_upload.select(fn=lambda: "upload", inputs=[], outputs=[img2img_batch_source_type])
+                                tab_batch_from_dir.select(fn=lambda: "from dir", inputs=[], outputs=[img2img_batch_source_type])
                                 with gr.Accordion("PNG info", open=False):
-                                    img2img_batch_use_png_info = gr.Checkbox(label="Append png info to prompts", **shared.hide_dirs, elem_id="img2img_batch_use_png_info")
+                                    img2img_batch_use_png_info = gr.Checkbox(label="Append png info to prompts", elem_id="img2img_batch_use_png_info")
                                     img2img_batch_png_info_dir = gr.Textbox(label="PNG info directory", **shared.hide_dirs, placeholder="Leave empty to use input directory", elem_id="img2img_batch_png_info_dir")
                                     img2img_batch_png_info_props = gr.CheckboxGroup(["Prompt", "Negative prompt", "Seed", "CFG scale", "Sampler", "Steps", "Model hash"], label="Parameters to take from png info", info="Prompts from png info will be appended to prompts set in ui.")
 
@@ -759,6 +766,8 @@ def create_ui():
                     img2img_batch_use_png_info,
                     img2img_batch_png_info_props,
                     img2img_batch_png_info_dir,
+                    img2img_batch_source_type,
+                    img2img_batch_upload,
                 ] + custom_inputs,
                 outputs=[
                     output_panel.gallery,

From 58eec83a546b8d61500c7b801cb0bdbe7650f6a6 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Thu, 16 May 2024 16:39:02 -0400
Subject: [PATCH 28/58] Fully prevent use_checkpoint

---
 modules/sd_models.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index ff245b7a6..a33fa7c33 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -551,6 +551,11 @@ def repair_config(sd_config):
         karlo_path = os.path.join(paths.models_path, 'karlo')
         sd_config.model.params.noise_aug_config.params.clip_stats_path = sd_config.model.params.noise_aug_config.params.clip_stats_path.replace("checkpoints/karlo_models", karlo_path)
 
+    # Do not use checkpoint for inference.
+    # This helps prevent extra performance overhead on checking parameters.
+    # The perf overhead is about 100ms/it on 4090.
+    sd_config.model.params.network_config.params.use_checkpoint = False
+
 
 def rescale_zero_terminal_snr_abar(alphas_cumprod):
     alphas_bar_sqrt = alphas_cumprod.sqrt()

From 2a8a60c2c50473f0ece5804d4a2cde0d1ff3d35e Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Thu, 16 May 2024 19:50:06 -0400
Subject: [PATCH 29/58] Add --precision half cmd option

---
 modules/cmd_args.py        |  2 +-
 modules/devices.py         | 24 ++++++++++++++++++++++++
 modules/sd_hijack_unet.py  | 29 ++++++++++++++++++++++-------
 modules/sd_hijack_utils.py | 26 +++++++++++++++-----------
 modules/sd_models.py       |  1 +
 modules/shared_init.py     |  8 ++++++++
 6 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 016a33d10..58c5e5d5b 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -41,7 +41,7 @@ parser.add_argument("--lowvram", action='store_true', help="enable stable diffus
 parser.add_argument("--lowram", action='store_true', help="load stable diffusion checkpoint weights to VRAM instead of RAM")
 parser.add_argument("--always-batch-cond-uncond", action='store_true', help="does not do anything")
 parser.add_argument("--unload-gfpgan", action='store_true', help="does not do anything.")
-parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast")
+parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "half", "autocast"], default="autocast")
 parser.add_argument("--upcast-sampling", action='store_true', help="upcast sampling. No effect with --no-half. Usually produces similar results to --no-half with better performance while using less memory.")
 parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site")
 parser.add_argument("--ngrok", type=str, help="ngrok authtoken, alternative to gradio --share", default=None)
diff --git a/modules/devices.py b/modules/devices.py
index e4f671ac6..7de34ac51 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -114,6 +114,9 @@ errors.run(enable_tf32, "Enabling TF32")
 
 cpu: torch.device = torch.device("cpu")
 fp8: bool = False
+# Force fp16 for all models in inference. No casting during inference.
+# This flag is controlled by "--precision half" command line arg.
+force_fp16: bool = False
 device: torch.device = None
 device_interrogate: torch.device = None
 device_gfpgan: torch.device = None
@@ -127,6 +130,8 @@ unet_needs_upcast = False
 
 
 def cond_cast_unet(input):
+    if force_fp16:
+        return input.to(torch.float16)
     return input.to(dtype_unet) if unet_needs_upcast else input
 
 
@@ -206,6 +211,11 @@ def autocast(disable=False):
     if disable:
         return contextlib.nullcontext()
 
+    if force_fp16:
+        # No casting during inference if force_fp16 is enabled.
+        # All tensor dtype conversion happens before inference.
+        return contextlib.nullcontext()
+
     if fp8 and device==cpu:
         return torch.autocast("cpu", dtype=torch.bfloat16, enabled=True)
 
@@ -269,3 +279,17 @@ def first_time_calculation():
     x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
     conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
     conv2d(x)
+
+
+def force_model_fp16():
+    """
+    ldm and sgm has modules.diffusionmodules.util.GroupNorm32.forward, which
+    force conversion of input to float32. If force_fp16 is enabled, we need to
+    prevent this casting.
+    """
+    assert force_fp16
+    import sgm.modules.diffusionmodules.util as sgm_util
+    import ldm.modules.diffusionmodules.util as ldm_util
+    sgm_util.GroupNorm32 = torch.nn.GroupNorm
+    ldm_util.GroupNorm32 = torch.nn.GroupNorm
+    print("ldm/sgm GroupNorm32 replaced with normal torch.nn.GroupNorm due to `--precision half`.")
diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 2101f1a04..41955313a 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -36,7 +36,7 @@ th = TorchHijackForUnet()
 
 # Below are monkey patches to enable upcasting a float16 UNet for float32 sampling
 def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
-
+    """Always make sure inputs to unet are in correct dtype."""
     if isinstance(cond, dict):
         for y in cond.keys():
             if isinstance(cond[y], list):
@@ -45,7 +45,11 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
                 cond[y] = cond[y].to(devices.dtype_unet) if isinstance(cond[y], torch.Tensor) else cond[y]
 
     with devices.autocast():
-        return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float()
+        result = orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs)
+        if devices.unet_needs_upcast:
+            return result.float()
+        else:
+            return result
 
 
 class GELUHijack(torch.nn.GELU, torch.nn.Module):
@@ -64,12 +68,11 @@ def hijack_ddpm_edit():
     if not ddpm_edit_hijack:
         CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond)
         CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
-        ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
+        ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model)
 
 
 unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
-CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
-CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
+
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)
     CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast)
@@ -81,5 +84,17 @@ CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.decode_first_stage', first_s
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond)
 
-CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model, unet_needs_upcast)
-CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
+CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model)
+CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model)
+
+
+def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs):
+    if devices.unet_needs_upcast and timesteps.dtype == torch.int64:
+        dtype = torch.float32
+    else:
+        dtype = devices.dtype_unet
+    return orig_func(timesteps, *args, **kwargs).to(dtype=dtype)
+
+
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
+CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
diff --git a/modules/sd_hijack_utils.py b/modules/sd_hijack_utils.py
index 79bf6e468..546f2eda4 100644
--- a/modules/sd_hijack_utils.py
+++ b/modules/sd_hijack_utils.py
@@ -1,7 +1,11 @@
 import importlib
 
+
+always_true_func = lambda *args, **kwargs: True
+
+
 class CondFunc:
-    def __new__(cls, orig_func, sub_func, cond_func):
+    def __new__(cls, orig_func, sub_func, cond_func=always_true_func):
         self = super(CondFunc, cls).__new__(cls)
         if isinstance(orig_func, str):
             func_path = orig_func.split('.')
@@ -20,13 +24,13 @@ class CondFunc:
                 print(f"Warning: Failed to resolve {orig_func} for CondFunc hijack")
                 pass
         self.__init__(orig_func, sub_func, cond_func)
-        return lambda *args, **kwargs: self(*args, **kwargs)
-    def __init__(self, orig_func, sub_func, cond_func):
-        self.__orig_func = orig_func
-        self.__sub_func = sub_func
-        self.__cond_func = cond_func
-    def __call__(self, *args, **kwargs):
-        if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs):
-            return self.__sub_func(self.__orig_func, *args, **kwargs)
-        else:
-            return self.__orig_func(*args, **kwargs)
+        return lambda *args, **kwargs: self(*args, **kwargs)
+    def __init__(self, orig_func, sub_func, cond_func):
+        self.__orig_func = orig_func
+        self.__sub_func = sub_func
+        self.__cond_func = cond_func
+    def __call__(self, *args, **kwargs):
+        if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs):
+            return self.__sub_func(self.__orig_func, *args, **kwargs)
+        else:
+            return self.__orig_func(*args, **kwargs)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ff245b7a6..9c5909168 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -403,6 +403,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         model.float()
         model.alphas_cumprod_original = model.alphas_cumprod
         devices.dtype_unet = torch.float32
+        assert shared.cmd_opts.precision != "half", "Cannot use --precision half with --no-half"
         timer.record("apply float()")
     else:
         vae = model.first_stage_model
diff --git a/modules/shared_init.py b/modules/shared_init.py
index 935e3a21c..a6ad0433d 100644
--- a/modules/shared_init.py
+++ b/modules/shared_init.py
@@ -31,6 +31,14 @@ def initialize():
     devices.dtype_vae = torch.float32 if cmd_opts.no_half or cmd_opts.no_half_vae else torch.float16
     devices.dtype_inference = torch.float32 if cmd_opts.precision == 'full' else devices.dtype
 
+    if cmd_opts.precision == "half":
+        msg = "--no-half and --no-half-vae conflict with --precision half"
+        assert devices.dtype == torch.float16, msg
+        assert devices.dtype_vae == torch.float16, msg
+        assert devices.dtype_inference == torch.float16, msg
+        devices.force_fp16 = True
+        devices.force_model_fp16()
+
     shared.device = devices.device
     shared.weight_load_location = None if cmd_opts.lowram else "cpu"
 

From 47f1d42a7e77259e2e7418ae8f941718c55cfd25 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Thu, 16 May 2024 20:06:04 -0400
Subject: [PATCH 30/58] Fix for SD15 models

---
 modules/sd_models.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index a33fa7c33..cda142bdd 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -553,8 +553,11 @@ def repair_config(sd_config):
 
     # Do not use checkpoint for inference.
     # This helps prevent extra performance overhead on checking parameters.
-    # The perf overhead is about 100ms/it on 4090.
-    sd_config.model.params.network_config.params.use_checkpoint = False
+    # The perf overhead is about 100ms/it on 4090 for SDXL.
+    if hasattr(sd_config.model.params, "network_config"):
+        sd_config.model.params.network_config.params.use_checkpoint = False
+    if hasattr(sd_config.model.params, "unet_config"):
+        sd_config.model.params.unet_config.params.use_checkpoint = False
 
 
 def rescale_zero_terminal_snr_abar(alphas_cumprod):

From 10f2407f48fa3a8bbd299068e5f67108f272b87d Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 18 May 2024 00:44:02 +0900
Subject: [PATCH 31/58] xyz csv skipinitialspace

---
 scripts/xyz_grid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index b9fd66fe5..d416e4c07 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -212,7 +212,7 @@ def list_to_csv_string(data_list):
 
 
 def csv_string_to_list_strip(data_str):
-    return list(map(str.strip, chain.from_iterable(csv.reader(StringIO(data_str)))))
+    return list(map(str.strip, chain.from_iterable(csv.reader(StringIO(data_str), skipinitialspace=True))))
 
 
 class AxisOption:

From 53d67088ee0fb190c3ae1330c2b876dedb16dd8b Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Fri, 17 May 2024 12:12:57 -0400
Subject: [PATCH 32/58] Patch timestep embedding to create tensor on-device

---
 modules/sd_hijack_unet.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 2101f1a04..0dabbe0e4 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -1,5 +1,7 @@
 import torch
 from packaging import version
+from einops import repeat
+import math
 
 from modules import devices
 from modules.sd_hijack_utils import CondFunc
@@ -48,6 +50,30 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
         return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float()
 
 
+# Monkey patch to create timestep embed tensor on device, avoiding a block.
+def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False):
+    """
+    Create sinusoidal timestep embeddings.
+    :param timesteps: a 1-D Tensor of N indices, one per batch element.
+                      These may be fractional.
+    :param dim: the dimension of the output.
+    :param max_period: controls the minimum frequency of the embeddings.
+    :return: an [N x dim] Tensor of positional embeddings.
+    """
+    if not repeat_only:
+        half = dim // 2
+        freqs = torch.exp(
+            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half
+        )
+        args = timesteps[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    else:
+        embedding = repeat(timesteps, 'b -> b d', d=dim)
+    return embedding
+
+
 class GELUHijack(torch.nn.GELU, torch.nn.Module):
     def __init__(self, *args, **kwargs):
         torch.nn.GELU.__init__(self, *args, **kwargs)
@@ -69,6 +95,7 @@ def hijack_ddpm_edit():
 
 unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding)
 CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)

From cc9ca67664ef72931af9a4dced88a8434c5d4f16 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Fri, 17 May 2024 13:14:26 -0400
Subject: [PATCH 33/58] Add transformer forward patch

---
 modules/sd_hijack_unet.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 0dabbe0e4..c680367eb 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -74,6 +74,30 @@ def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False):
     return embedding
 
 
+# Monkey patch to SpatialTransformer removing unnecessary contiguous calls.
+# Prevents a lot of unnecessary aten::copy_ calls
+def spatial_transformer_forward(_, self, x: torch.Tensor, context=None):
+    # note: if no context is given, cross-attention defaults to self-attention
+    if not isinstance(context, list):
+        context = [context]
+    b, c, h, w = x.shape
+    x_in = x
+    x = self.norm(x)
+    if not self.use_linear:
+        x = self.proj_in(x)
+    x = x.permute(0, 2, 3, 1).reshape(b, h * w, c)
+    if self.use_linear:
+        x = self.proj_in(x)
+    for i, block in enumerate(self.transformer_blocks):
+        x = block(x, context=context[i])
+    if self.use_linear:
+        x = self.proj_out(x)
+    x = x.view(b, h, w, c).permute(0, 3, 1, 2)
+    if not self.use_linear:
+        x = self.proj_out(x)
+    return x + x_in
+
+
 class GELUHijack(torch.nn.GELU, torch.nn.Module):
     def __init__(self, *args, **kwargs):
         torch.nn.GELU.__init__(self, *args, **kwargs)
@@ -95,7 +119,8 @@ def hijack_ddpm_edit():
 
 unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
-CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding)
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding, lambda *args, **kwargs: True)
+CondFunc('ldm.modules.attention.SpatialTransformer.forward', spatial_transformer_forward, lambda *args, **kwargs: True)
 CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)

From dca9007ac7a9852752d91d34d2ed1feaef6a03f2 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Fri, 17 May 2024 13:23:12 -0400
Subject: [PATCH 34/58] Fix SD15 dtype

---
 modules/sd_models.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 9c5909168..7d4ab0fd8 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -733,6 +733,10 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
             sd_model = instantiate_from_config(sd_config.model)
 
     sd_model.used_config = checkpoint_config
+    # ldm's Unet is using self.dtype to cast input tensor. If we do not overwrite
+    # UnetModel.dtype, it will be the default dtype from config.
+    # sgm's Unet is not using dtype for casting. The value will be ignored.
+    sd_model.model.diffusion_model.dtype = devices.dtype_unet
 
     timer.record("create model")
 

From b57a70f37322142939f7429f287599e027108bfc Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Fri, 17 May 2024 13:34:04 -0400
Subject: [PATCH 35/58] Proper fix of SD15 dtype

---
 modules/sd_models.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 7d4ab0fd8..26a5127cd 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -541,7 +541,7 @@ def repair_config(sd_config):
     if hasattr(sd_config.model.params, 'unet_config'):
         if shared.cmd_opts.no_half:
             sd_config.model.params.unet_config.params.use_fp16 = False
-        elif shared.cmd_opts.upcast_sampling:
+        elif shared.cmd_opts.upcast_sampling or shared.cmd_opts.precision == "half":
             sd_config.model.params.unet_config.params.use_fp16 = True
 
     if getattr(sd_config.model.params.first_stage_config.params.ddconfig, "attn_type", None) == "vanilla-xformers" and not shared.xformers_available:
@@ -733,10 +733,6 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
             sd_model = instantiate_from_config(sd_config.model)
 
     sd_model.used_config = checkpoint_config
-    # ldm's Unet is using self.dtype to cast input tensor. If we do not overwrite
-    # UnetModel.dtype, it will be the default dtype from config.
-    # sgm's Unet is not using dtype for casting. The value will be ignored.
-    sd_model.model.diffusion_model.dtype = devices.dtype_unet
 
     timer.record("create model")
 

From 1d7448281751ea3223c681a82de8219a6fbe1d22 Mon Sep 17 00:00:00 2001
From: Logan <loganbooker@gmail.com>
Date: Sat, 18 May 2024 09:09:57 +1000
Subject: [PATCH 36/58] Default device for sigma tensor to CPU

* Consistent with implementations in k-diffusion.
* Makes this compatible with https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15823
---
 modules/sd_schedulers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 0ac1f7a21..4ddb77850 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -33,7 +33,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     sigs += [0.0]
     return torch.FloatTensor(sigs).to(device)
 
-def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device):
+def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device='cpu'):
     # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
     def loglinear_interp(t_steps, num_steps):
         """

From 281e0a007b102c7fc9f6150fb88c95470dc25a17 Mon Sep 17 00:00:00 2001
From: Andray <light.and.ray@gmail.com>
Date: Sat, 18 May 2024 09:13:16 +0400
Subject: [PATCH 37/58] scroll extensions table on overflow

---
 style.css | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/style.css b/style.css
index f6a89b8f9..5ec803a04 100644
--- a/style.css
+++ b/style.css
@@ -807,6 +807,8 @@ table.popup-table .link{
 
 #tab_extensions table{
     border-collapse: collapse;
+    overflow-x: auto;
+    display: block;
 }
 
 #tab_extensions table td, #tab_extensions table th{

From feeb6802aa71fad190da2e051e50af84a94eda85 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Sat, 18 May 2024 01:22:31 -0400
Subject: [PATCH 38/58] fix case where first step skilled if skip early cond is
 0

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 082a4f63c..d89ea2c8b 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -212,7 +212,7 @@ class CFGDenoiser(torch.nn.Module):
         uncond = denoiser_params.text_uncond
         skip_uncond = False
 
-        if self.step / self.total_steps <= shared.opts.skip_early_cond:
+        if shared.opts.skip_early_cond != 0. and self.step / self.total_steps <= shared.opts.skip_early_cond:
             skip_uncond = True
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]

From 501ac016da8c28ff4778219f142f0622083237ce Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 18 May 2024 18:37:37 +0900
Subject: [PATCH 39/58] Reformat

---
 scripts/xyz_grid.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index b9fd66fe5..b23fd4770 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -162,12 +162,14 @@ def apply_override(field, boolean: bool = False):
         if boolean:
             x = True if x.lower() == "true" else False
         p.override_settings[field] = x
+
     return fun
 
 
 def boolean_choice(reverse: bool = False):
     def choice():
         return ["False", "True"] if reverse else ["True", "False"]
+
     return choice
 
 
@@ -572,7 +574,7 @@ class Script(scripts.Script):
                     mc = re_range_count.fullmatch(val)
                     if m is not None:
                         start = int(m.group(1))
-                        end = int(m.group(2))+1
+                        end = int(m.group(2)) + 1
                         step = int(m.group(3)) if m.group(3) is not None else 1
 
                         valslist_ext += list(range(start, end, step))
@@ -725,11 +727,11 @@ class Script(scripts.Script):
             ydim = len(ys) if vary_seeds_y else 1
 
             if vary_seeds_x:
-               pc.seed += ix
+                pc.seed += ix
             if vary_seeds_y:
-               pc.seed += iy * xdim
+                pc.seed += iy * xdim
             if vary_seeds_z:
-               pc.seed += iz * xdim * ydim
+                pc.seed += iz * xdim * ydim
 
             try:
                 res = process_images(pc)
@@ -797,18 +799,18 @@ class Script(scripts.Script):
         z_count = len(zs)
 
         # Set the grid infotexts to the real ones with extra_generation_params (1 main grid + z_count sub-grids)
-        processed.infotexts[:1+z_count] = grid_infotext[:1+z_count]
+        processed.infotexts[:1 + z_count] = grid_infotext[:1 + z_count]
 
         if not include_lone_images:
             # Don't need sub-images anymore, drop from list:
-            processed.images = processed.images[:z_count+1]
+            processed.images = processed.images[:z_count + 1]
 
         if opts.grid_save:
             # Auto-save main and sub-grids:
             grid_count = z_count + 1 if z_count > 1 else 1
             for g in range(grid_count):
                 # TODO: See previous comment about intentional data misalignment.
-                adj_g = g-1 if g > 0 else g
+                adj_g = g - 1 if g > 0 else g
                 images.save_image(processed.images[g], p.outpath_grids, "xyz_grid", info=processed.infotexts[g], extension=opts.grid_format, prompt=processed.all_prompts[adj_g], seed=processed.all_seeds[adj_g], grid=True, p=processed)
                 if not include_sub_grids:  # if not include_sub_grids then skip saving after the first grid
                     break

From 969a462ac9ea52eb61b8de9fd685cc477c8b8dac Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 18 May 2024 18:27:34 +0900
Subject: [PATCH 40/58] xyz util confirm_range

---
 scripts/xyz_grid.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index b23fd4770..81c7abe95 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -95,6 +95,17 @@ def confirm_checkpoints_or_none(p, xs):
             raise RuntimeError(f"Unknown checkpoint: {x}")
 
 
+def confirm_range(min_val, max_val, axis_label):
+    """Generates a AxisOption.confirm() function that checks all values are within the specified range."""
+
+    def confirm_range_fun(p, xs):
+        for x in xs:
+            if not (max_val >= x >= min_val):
+                raise ValueError(f'{axis_label} value "{x}" out of range [{min_val}, {max_val}]')
+
+    return confirm_range_fun
+
+
 def apply_clip_skip(p, x, xs):
     opts.data["CLIP_stop_at_last_layers"] = x
 

From 24a59ad3d2f9f44130746fdfe54f9f51ba74e77f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 18 May 2024 15:36:49 +0900
Subject: [PATCH 41/58] fix Hypertile xyz grid

---
 .../hypertile/scripts/hypertile_script.py     | 17 ++++++-
 .../hypertile/scripts/hypertile_xyz.py        | 51 -------------------
 2 files changed, 15 insertions(+), 53 deletions(-)
 delete mode 100644 extensions-builtin/hypertile/scripts/hypertile_xyz.py

diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
index 395d584b6..59e7f9907 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_script.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -1,6 +1,5 @@
 import hypertile
 from modules import scripts, script_callbacks, shared
-from scripts.hypertile_xyz import add_axis_options
 
 
 class ScriptHypertile(scripts.Script):
@@ -93,7 +92,6 @@ def on_ui_settings():
         "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile U-Net max depth").info("larger = more neural network layers affected; minor effect on performance"),
         "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-Net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile U-Net max tile size").info("larger = worse performance"),
         "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-Net swap size", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, infotext="Hypertile U-Net swap size"),
-
         "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE", infotext="Hypertile VAE").info("minimal change in the generated picture"),
         "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile VAE max depth"),
         "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile VAE max tile size"),
@@ -105,5 +103,20 @@ def on_ui_settings():
         shared.opts.add_option(name, opt)
 
 
+def add_axis_options():
+    xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
+    xyz_grid.axis_options.extend([
+        xyz_grid.AxisOption("[Hypertile] Unet First pass Enabled", str, xyz_grid.apply_override('hypertile_enable_unet', boolean=True), choices=xyz_grid.boolean_choice(reverse=True)),
+        xyz_grid.AxisOption("[Hypertile] Unet Second pass Enabled", str, xyz_grid.apply_override('hypertile_enable_unet_secondpass', boolean=True), choices=xyz_grid.boolean_choice(reverse=True)),
+        xyz_grid.AxisOption("[Hypertile] Unet Max Depth", int, xyz_grid.apply_override("hypertile_max_depth_unet"), confirm=xyz_grid.confirm_range(0, 3, '[Hypertile] Unet Max Depth'), choices=lambda: [str(x) for x in range(4)]),
+        xyz_grid.AxisOption("[Hypertile] Unet Max Tile Size", int, xyz_grid.apply_override("hypertile_max_tile_unet"), confirm=xyz_grid.confirm_range(0, 512, '[Hypertile] Unet Max Tile Size')),
+        xyz_grid.AxisOption("[Hypertile] Unet Swap Size", int, xyz_grid.apply_override("hypertile_swap_size_unet"), confirm=xyz_grid.confirm_range(0, 64, '[Hypertile] Unet Swap Size')),
+        xyz_grid.AxisOption("[Hypertile] VAE Enabled", str, xyz_grid.apply_override('hypertile_enable_vae', boolean=True), choices=xyz_grid.boolean_choice(reverse=True)),
+        xyz_grid.AxisOption("[Hypertile] VAE Max Depth", int, xyz_grid.apply_override("hypertile_max_depth_vae"), confirm=xyz_grid.confirm_range(0, 3, '[Hypertile] VAE Max Depth'), choices=lambda: [str(x) for x in range(4)]),
+        xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, xyz_grid.apply_override("hypertile_max_tile_vae"), confirm=xyz_grid.confirm_range(0, 512, '[Hypertile] VAE Max Tile Size')),
+        xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, xyz_grid.apply_override("hypertile_swap_size_vae"), confirm=xyz_grid.confirm_range(0, 64, '[Hypertile] VAE Swap Size')),
+    ])
+
+
 script_callbacks.on_ui_settings(on_ui_settings)
 script_callbacks.on_before_ui(add_axis_options)
diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
deleted file mode 100644
index 9e96ae3c5..000000000
--- a/extensions-builtin/hypertile/scripts/hypertile_xyz.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from modules import scripts
-from modules.shared import opts
-
-xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
-
-def int_applier(value_name:str, min_range:int = -1, max_range:int = -1):
-    """
-    Returns a function that applies the given value to the given value_name in opts.data.
-    """
-    def validate(value_name:str, value:str):
-        value = int(value)
-        # validate value
-        if not min_range == -1:
-            assert value >= min_range, f"Value {value} for {value_name} must be greater than or equal to {min_range}"
-        if not max_range == -1:
-            assert value <= max_range, f"Value {value} for {value_name} must be less than or equal to {max_range}"
-    def apply_int(p, x, xs):
-        validate(value_name, x)
-        opts.data[value_name] = int(x)
-    return apply_int
-
-def bool_applier(value_name:str):
-    """
-    Returns a function that applies the given value to the given value_name in opts.data.
-    """
-    def validate(value_name:str, value:str):
-        assert value.lower() in ["true", "false"], f"Value {value} for {value_name} must be either true or false"
-    def apply_bool(p, x, xs):
-        validate(value_name, x)
-        value_boolean = x.lower() == "true"
-        opts.data[value_name] = value_boolean
-    return apply_bool
-
-def add_axis_options():
-    extra_axis_options = [
-        xyz_grid.AxisOption("[Hypertile] Unet First pass Enabled", str, bool_applier("hypertile_enable_unet"), choices=xyz_grid.boolean_choice(reverse=True)),
-        xyz_grid.AxisOption("[Hypertile] Unet Second pass Enabled", str, bool_applier("hypertile_enable_unet_secondpass"), choices=xyz_grid.boolean_choice(reverse=True)),
-        xyz_grid.AxisOption("[Hypertile] Unet Max Depth", int, int_applier("hypertile_max_depth_unet", 0, 3), choices=lambda: [str(x) for x in range(4)]),
-        xyz_grid.AxisOption("[Hypertile] Unet Max Tile Size", int, int_applier("hypertile_max_tile_unet", 0, 512)),
-        xyz_grid.AxisOption("[Hypertile] Unet Swap Size", int, int_applier("hypertile_swap_size_unet", 0, 64)),
-        xyz_grid.AxisOption("[Hypertile] VAE Enabled", str, bool_applier("hypertile_enable_vae"), choices=xyz_grid.boolean_choice(reverse=True)),
-        xyz_grid.AxisOption("[Hypertile] VAE Max Depth", int, int_applier("hypertile_max_depth_vae", 0, 3), choices=lambda: [str(x) for x in range(4)]),
-        xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)),
-        xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)),
-    ]
-    set_a = {opt.label for opt in xyz_grid.axis_options}
-    set_b = {opt.label for opt in extra_axis_options}
-    if set_a.intersection(set_b):
-        return
-
-    xyz_grid.axis_options.extend(extra_axis_options)

From 82884da18c8f183c4ce0e7237953303f26610370 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 04:55:45 +0900
Subject: [PATCH 42/58] use apply_override for Clip skip

---
 scripts/xyz_grid.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index b9fd66fe5..c7cb51333 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -264,7 +264,7 @@ axis_options = [
     AxisOption("Schedule max sigma", float, apply_override("sigma_max")),
     AxisOption("Schedule rho", float, apply_override("rho")),
     AxisOption("Eta", float, apply_field("eta")),
-    AxisOption("Clip skip", int, apply_clip_skip),
+    AxisOption("Clip skip", int, apply_override('CLIP_stop_at_last_layers')),
     AxisOption("Denoising", float, apply_field("denoising_strength")),
     AxisOption("Initial noise multiplier", float, apply_field("initial_noise_multiplier")),
     AxisOption("Extra noise", float, apply_override("img2img_extra_noise")),
@@ -399,7 +399,6 @@ def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend
 
 class SharedSettingsStackHelper(object):
     def __enter__(self):
-        self.CLIP_stop_at_last_layers = opts.CLIP_stop_at_last_layers
         self.vae = opts.sd_vae
         self.uni_pc_order = opts.uni_pc_order
 
@@ -409,8 +408,6 @@ class SharedSettingsStackHelper(object):
         modules.sd_models.reload_model_weights()
         modules.sd_vae.reload_vae_weights()
 
-        opts.data["CLIP_stop_at_last_layers"] = self.CLIP_stop_at_last_layers
-
 
 re_range = re.compile(r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\(([+-]\d+)\s*\))?\s*")
 re_range_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d*)?)(?:\s*\(([+-]\d+(?:.\d*)?)\s*\))?\s*")

From 1f392517f8938e0082e189fa0c28f4eb89fb0eb2 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 04:59:05 +0900
Subject: [PATCH 43/58] use override for uni_pc_order

---
 scripts/xyz_grid.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index c7cb51333..622cc43c3 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -140,7 +140,7 @@ def apply_styles(p: StableDiffusionProcessingTxt2Img, x: str, _):
 
 
 def apply_uni_pc_order(p, x, xs):
-    opts.data["uni_pc_order"] = min(x, p.steps - 1)
+    p.override_settings['uni_pc_order'] = min(x, p.steps - 1)
 
 
 def apply_face_restore(p, opt, x):
@@ -400,11 +400,9 @@ def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend
 class SharedSettingsStackHelper(object):
     def __enter__(self):
         self.vae = opts.sd_vae
-        self.uni_pc_order = opts.uni_pc_order
 
     def __exit__(self, exc_type, exc_value, tb):
         opts.data["sd_vae"] = self.vae
-        opts.data["uni_pc_order"] = self.uni_pc_order
         modules.sd_models.reload_model_weights()
         modules.sd_vae.reload_vae_weights()
 

From 1e696b028adbd449df8c30ed760103b120ec5546 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 05:14:32 +0900
Subject: [PATCH 44/58] use override of sd_vae

---
 scripts/xyz_grid.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 622cc43c3..4c83e92b2 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -118,21 +118,16 @@ def apply_size(p, x: str, xs) -> None:
 
 
 def find_vae(name: str):
-    if name.lower() in ['auto', 'automatic']:
-        return modules.sd_vae.unspecified
-    if name.lower() == 'none':
-        return None
-    else:
-        choices = [x for x in sorted(modules.sd_vae.vae_dict, key=lambda x: len(x)) if name.lower().strip() in x.lower()]
-        if len(choices) == 0:
-            print(f"No VAE found for {name}; using automatic")
-            return modules.sd_vae.unspecified
-        else:
-            return modules.sd_vae.vae_dict[choices[0]]
+    match name := name.lower().strip():
+        case 'auto', 'automatic':
+            return 'Automatic'
+        case 'none':
+            return 'None'
+    return next((k for k in modules.sd_vae.vae_dict if k.lower() == name), print(f'No VAE found for {name}; using Automatic') or 'Automatic')
 
 
 def apply_vae(p, x, xs):
-    modules.sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x))
+    p.override_settings['sd_vae'] = find_vae(x)
 
 
 def apply_styles(p: StableDiffusionProcessingTxt2Img, x: str, _):
@@ -270,7 +265,7 @@ axis_options = [
     AxisOption("Extra noise", float, apply_override("img2img_extra_noise")),
     AxisOptionTxt2Img("Hires upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]),
     AxisOptionImg2Img("Cond. Image Mask Weight", float, apply_field("inpainting_mask_weight")),
-    AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)),
+    AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['Automatic', 'None'] + list(sd_vae.vae_dict)),
     AxisOption("Styles", str, apply_styles, choices=lambda: list(shared.prompt_styles.styles)),
     AxisOption("UniPC Order", int, apply_uni_pc_order, cost=0.5),
     AxisOption("Face restore", str, apply_face_restore, format_value=format_value),
@@ -399,10 +394,9 @@ def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend
 
 class SharedSettingsStackHelper(object):
     def __enter__(self):
-        self.vae = opts.sd_vae
+        pass
 
     def __exit__(self, exc_type, exc_value, tb):
-        opts.data["sd_vae"] = self.vae
         modules.sd_models.reload_model_weights()
         modules.sd_vae.reload_vae_weights()
 

From 51e7122f25c276b258a8f55a64e60e5b2265287f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 05:17:44 +0900
Subject: [PATCH 45/58] remove unused code

---
 scripts/xyz_grid.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 4c83e92b2..23dafd477 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -95,17 +95,6 @@ def confirm_checkpoints_or_none(p, xs):
             raise RuntimeError(f"Unknown checkpoint: {x}")
 
 
-def apply_clip_skip(p, x, xs):
-    opts.data["CLIP_stop_at_last_layers"] = x
-
-
-def apply_upscale_latent_space(p, x, xs):
-    if x.lower().strip() != '0':
-        opts.data["use_scale_latent_for_hires_fix"] = True
-    else:
-        opts.data["use_scale_latent_for_hires_fix"] = False
-
-
 def apply_size(p, x: str, xs) -> None:
     try:
         width, _, height = x.partition('x')

From 5867be2914c303c2f8ba86ff23dba4b31aeafa79 Mon Sep 17 00:00:00 2001
From: viking1304 <viking1304@gmail.com>
Date: Mon, 20 May 2024 23:44:17 +0200
Subject: [PATCH 46/58] Use different torch versions for Intel and ARM Macs

---
 webui-macos-env.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/webui-macos-env.sh b/webui-macos-env.sh
index db7e8b1a0..ad0736378 100644
--- a/webui-macos-env.sh
+++ b/webui-macos-env.sh
@@ -11,7 +11,12 @@ fi
 
 export install_dir="$HOME"
 export COMMANDLINE_ARGS="--skip-torch-cuda-test --upcast-sampling --no-half-vae --use-cpu interrogate"
-export TORCH_COMMAND="pip install torch==2.1.0 torchvision==0.16.0"
 export PYTORCH_ENABLE_MPS_FALLBACK=1
 
+if [[ "$(sysctl -n machdep.cpu.brand_string)" =~ ^.*"Intel".*$ ]]; then
+    export TORCH_COMMAND="pip install torch==2.1.2 torchvision==0.16.2"
+else
+    export TORCH_COMMAND="pip install torch==2.3.0 torchvision==0.18.0"
+fi
+
 ####################################################################

From 344eda55d4550e91b1a3e95f8e669084a74c876f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 22 May 2024 23:06:07 +0900
Subject: [PATCH 47/58] ReloadUI backgroundColor --background-fill-primary

---
 javascript/ui.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/javascript/ui.js b/javascript/ui.js
index e0f5feebd..16faacebb 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -337,8 +337,8 @@ onOptionsChanged(function() {
 let txt2img_textarea, img2img_textarea = undefined;
 
 function restart_reload() {
+    document.body.style.backgroundColor = "var(--background-fill-primary)";
     document.body.innerHTML = '<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>';
-
     var requestPing = function() {
         requestGet("./internal/ping", {}, function(data) {
             location.reload();

From a63946233b71083f6726006b96fc16e3033ab844 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 25 May 2024 14:18:05 +0900
Subject: [PATCH 48/58] setuptools==69.5.1

---
 requirements_versions.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements_versions.txt b/requirements_versions.txt
index 3df74f3d6..3037a395b 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -1,3 +1,4 @@
+setuptools==69.5.1  # temp fix for compatibility with some old packages
 GitPython==3.1.32
 Pillow==9.5.0
 accelerate==0.21.0

From 6dd53ce63dc70b3fcf7f25402d40b48f50abdf74 Mon Sep 17 00:00:00 2001
From: alcacode <alca_addr@protonmail.com>
Date: Sun, 26 May 2024 15:36:55 +0200
Subject: [PATCH 49/58] Fix bug where file extension had an extra '.' under
 some circumstances

Fix bug where under some circumstances an extra "." was inserted between the file base name and the file extension.
The bug is triggered when the extension argument is one of "jpg", "jpeg", or "webp", and the image exceeds the format's dimension limit. Then the extension variable is set to ".png", resulting in the fullfn variable to evaluate to a string ending with "..png".
---
 modules/images.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/images.py b/modules/images.py
index c0ff8a630..1be176cdf 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -653,7 +653,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
     # WebP and JPG formats have maximum dimension limits of 16383 and 65535 respectively. switch to PNG which has a much higher limit
     if (image.height > 65535 or image.width > 65535) and extension.lower() in ("jpg", "jpeg") or (image.height > 16383 or image.width > 16383) and extension.lower() == "webp":
         print('Image dimensions too large; saving as PNG')
-        extension = ".png"
+        extension = "png"
 
     if save_to_dirs is None:
         save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt)

From 801b72b92b4f07e5d2fa9737b160762ea8f67088 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Tue, 28 May 2024 21:20:23 +0300
Subject: [PATCH 50/58] update changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 295d26c8c..5c16b5611 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 1.9.4
+
+### Bug Fixes:
+*  pin setuptools version to fix the startup error ([#15883](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15883)) 
+
 ## 1.9.3
 
 ### Bug Fixes:

From 8d6f7417385d1cacfd827800bdf02a0e8dd8f092 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 29 May 2024 03:33:32 +0900
Subject: [PATCH 51/58] #15883 -> #15882

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c16b5611..596b1ec45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 ## 1.9.4
 
 ### Bug Fixes:
-*  pin setuptools version to fix the startup error ([#15883](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15883)) 
+*  pin setuptools version to fix the startup error ([#15882](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15882)) 
 
 ## 1.9.3
 

From 10f8d0f84216e3642e960ea7118a5acc8a79546f Mon Sep 17 00:00:00 2001
From: eatmoreapple <eatmoreorange@gmail.com>
Date: Tue, 4 Jun 2024 15:02:13 +0800
Subject: [PATCH 52/58] feat: lora partial update precede full update.

---
 extensions-builtin/Lora/networks.py | 40 +++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 42b14dc23..18809364b 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -260,6 +260,16 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
 
     loaded_networks.clear()
 
+    unavailable_networks = []
+    for name in names:
+        if name.lower() in forbidden_network_aliases and available_networks.get(name) is None:
+            unavailable_networks.append(name)
+        elif available_network_aliases.get(name) is None:
+            unavailable_networks.append(name)
+
+    if unavailable_networks:
+        update_available_networks_by_names(unavailable_networks)
+
     networks_on_disk = [available_networks.get(name, None) if name.lower() in forbidden_network_aliases else available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
@@ -566,22 +576,16 @@ def network_MultiheadAttention_load_state_dict(self, *args, **kwargs):
     return originals.MultiheadAttention_load_state_dict(self, *args, **kwargs)
 
 
-def list_available_networks():
-    available_networks.clear()
-    available_network_aliases.clear()
-    forbidden_network_aliases.clear()
-    available_network_hash_lookup.clear()
-    forbidden_network_aliases.update({"none": 1, "Addams": 1})
-
-    os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
-
+def process_network_files(names: list[str] | None = None):
     candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
     candidates += list(shared.walk_files(shared.cmd_opts.lyco_dir_backcompat, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
     for filename in candidates:
         if os.path.isdir(filename):
             continue
-
         name = os.path.splitext(os.path.basename(filename))[0]
+        # if names is provided, only load networks with names in the list
+        if names and name not in names:
+            continue
         try:
             entry = network.NetworkOnDisk(name, filename)
         except OSError:  # should catch FileNotFoundError and PermissionError etc.
@@ -597,6 +601,22 @@ def list_available_networks():
         available_network_aliases[entry.alias] = entry
 
 
+def update_available_networks_by_names(names: list[str]):
+    process_network_files(names)
+
+
+def list_available_networks():
+    available_networks.clear()
+    available_network_aliases.clear()
+    forbidden_network_aliases.clear()
+    available_network_hash_lookup.clear()
+    forbidden_network_aliases.update({"none": 1, "Addams": 1})
+
+    os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
+
+    process_network_files()
+
+
 re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
 
 

From 25bbf31f5701b85804908a54b2f6af38a1d50f1f Mon Sep 17 00:00:00 2001
From: NouberNou <korewananda@gmail.com>
Date: Thu, 6 Jun 2024 16:22:49 -0700
Subject: [PATCH 53/58] Fix for grids without comprehensive infotexts

When generating grids, some scripts such as img2img loopback and ultimate SD upscale do not pass infotexts for each image since they are the same prompt.

If you attempt to save those images using the saved button in the UI it will fail because it will look for the selected image info text. This fixes those errors by replicating the infotext for as many images are passed into the image list if the infotext parameter is none.
---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index 76557dd7f..cb37a77df 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -569,7 +569,7 @@ class Processed:
         self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt]
         self.all_seeds = all_seeds or p.all_seeds or [self.seed]
         self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
-        self.infotexts = infotexts or [info]
+        self.infotexts = infotexts or [info] * len(image_list)
         self.version = program_version()
 
     def js(self):

From 53f62674ae55e84aff4d4c9ed104ba9dce8ae887 Mon Sep 17 00:00:00 2001
From: NouberNou <korewananda@gmail.com>
Date: Thu, 6 Jun 2024 16:30:01 -0700
Subject: [PATCH 54/58] Typo on edit

Edited in fix in Github editor and mistyped from local copy
---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index cb37a77df..c22da4169 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -569,7 +569,7 @@ class Processed:
         self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt]
         self.all_seeds = all_seeds or p.all_seeds or [self.seed]
         self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
-        self.infotexts = infotexts or [info] * len(image_list)
+        self.infotexts = infotexts or [info] * len(images_list)
         self.version = program_version()
 
     def js(self):

From 0769aa318a1896ccf74f57e6e943eb6b5fab5051 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 09:05:35 +0300
Subject: [PATCH 55/58] integrated edits as recommended in the PR #15804

---
 modules/sd_hijack_optimizations.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 4c2dc56d4..0269f1f5b 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -486,18 +486,7 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
     k_in = self.to_k(context_k)
     v_in = self.to_v(context_v)
 
-    def _reshape(t):
-        """rearrange(t, 'b n (h d) -> b n h d', h=h).
-        Using torch native operations to avoid overhead as this function is
-        called frequently. (70 times/it for SDXL)
-        """
-        b, n, _ = t.shape  # Get the batch size (b) and sequence length (n)
-        d = t.shape[2] // h  # Determine the depth per head
-        return t.reshape(b, n, h, d)
-
-    q = _reshape(q_in)
-    k = _reshape(k_in)
-    v = _reshape(v_in)
+    q, k, v = (t.reshape(t.shape[0], t.shape[1], h, -1) for t in (q_in, k_in, v_in))
 
     del q_in, k_in, v_in
 
@@ -509,7 +498,6 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
 
     out = out.to(dtype)
 
-    # out = rearrange(out, 'b n h d -> b n (h d)', h=h)
     b, n, h, d = out.shape
     out = out.reshape(b, n, h * d)
     return self.to_out(out)

From 5429e4cff514df2f4cab242212ba347741eadc08 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 09:56:09 +0300
Subject: [PATCH 56/58] add proper infotext support for #15607 fix settings
 override not working for NGMI, s_churn, etc...

---
 modules/processing.py               | 14 ++++++++------
 modules/sd_samplers_cfg_denoiser.py | 12 +++++++-----
 modules/shared_options.py           |  6 +++---
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index c22da4169..97a7162aa 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -238,11 +238,6 @@ class StableDiffusionProcessing:
             self.styles = []
 
         self.sampler_noise_scheduler_override = None
-        self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond
-        self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn
-        self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin
-        self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf')
-        self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise
 
         self.extra_generation_params = self.extra_generation_params or {}
         self.override_settings = self.override_settings or {}
@@ -259,6 +254,13 @@ class StableDiffusionProcessing:
         self.cached_uc = StableDiffusionProcessing.cached_uc
         self.cached_c = StableDiffusionProcessing.cached_c
 
+    def fill_fields_from_opts(self):
+        self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond
+        self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn
+        self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin
+        self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf')
+        self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise
+
     @property
     def sd_model(self):
         return shared.sd_model
@@ -794,7 +796,6 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr,
         "Init image hash": getattr(p, 'init_img_hash', None),
         "RNG": opts.randn_source if opts.randn_source != "GPU" else None,
-        "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
         "Tiling": "True" if p.tiling else None,
         **p.extra_generation_params,
         "Version": program_version() if opts.add_version_to_infotext else None,
@@ -890,6 +891,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     modules.sd_hijack.model_hijack.apply_circular(p.tiling)
     modules.sd_hijack.model_hijack.clear_comments()
 
+    p.fill_fields_from_opts()
     p.setup_prompts()
 
     if isinstance(seed, list):
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index d89ea2c8b..f48f58a50 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -214,12 +214,14 @@ class CFGDenoiser(torch.nn.Module):
 
         if shared.opts.skip_early_cond != 0. and self.step / self.total_steps <= shared.opts.skip_early_cond:
             skip_uncond = True
-            x_in = x_in[:-batch_size]
-            sigma_in = sigma_in[:-batch_size]
-
-        # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
-        if (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
+            self.p.extra_generation_params["Skip Early CFG"] = shared.opts.skip_early_cond
+        elif (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
             skip_uncond = True
+            self.p.extra_generation_params["NGMS"] = s_min_uncond
+            if shared.opts.s_min_uncond_all:
+                self.p.extra_generation_params["NGMS all steps"] = shared.opts.s_min_uncond_all
+
+        if skip_uncond:
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]
 
diff --git a/modules/shared_options.py b/modules/shared_options.py
index c711fa5f6..05c3d9391 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -209,8 +209,8 @@ options_templates.update(options_section(('img2img', "img2img", "sd"), {
 
 options_templates.update(options_section(('optimizations', "Optimizations", "sd"), {
     "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}),
-    "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
-    "s_min_uncond_all": OptionInfo(False, "NGMS: Skip every step").info("makes Negative Guidance minimum sigma skip negative guidance on every step instead of only half"),
+    "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}, infotext='NGMS').link("PR", "https://github.com/AUTOMATIC1111/stablediffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
+    "s_min_uncond_all": OptionInfo(False, "Negative Guidance minimum sigma all steps", infotext='NGMS all steps').info("By default, NGMS above skips every other step; this makes it skip all steps"),
     "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"),
@@ -382,7 +382,7 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
     'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"),
     'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
     'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"),
-    'skip_early_cond': OptionInfo(0, "Skip CFG during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("CFG will be disabled (set to 1) on early steps, can both improve sample diversity/quality and speed up sampling"),
+    'skip_early_cond': OptionInfo(0.0, "Ignore negative prompt during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("disables CFG on a proportion of steps at the beginning of generation; 0=skip none; 1=skip all; can both improve sample diversity/quality and speed up sampling"),
 }))
 
 options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {

From cd9e9e404955df19a72c832d68888db44ab7b382 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 10:13:38 +0300
Subject: [PATCH 57/58] remove unneeded tabulation

---
 .../Lora/ui_extra_networks_lora.py            | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index e35d90c6e..3e34d69dc 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -60,19 +60,18 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         else:
             sd_version = lora_on_disk.sd_version
 
-        if shared.sd_model is not None:  # still show LoRA in case an error occurs during initial model loading
-            if shared.opts.lora_show_all or not enable_filter:
-                pass
-            elif sd_version == network.SdVersion.Unknown:
-                model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
-                if model_version.name in shared.opts.lora_hide_unknown_for_versions:
-                    return None
-            elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
-                return None
-            elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
-                return None
-            elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
+        if shared.opts.lora_show_all or not enable_filter or not shared.sd_model:
+            pass
+        elif sd_version == network.SdVersion.Unknown:
+            model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
+            if model_version.name in shared.opts.lora_hide_unknown_for_versions:
                 return None
+        elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
+            return None
+        elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
+            return None
+        elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
+            return None
 
         return item
 

From 603509ec905a9c9ac1011e9531a9da180828fcc0 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 10:54:41 +0300
Subject: [PATCH 58/58] as per wfjsw's suggestion, revert changes for
 sd_hijack_checkpoint.py

---
 modules/sd_hijack_checkpoint.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/modules/sd_hijack_checkpoint.py b/modules/sd_hijack_checkpoint.py
index b2f05bbdc..2604d969f 100644
--- a/modules/sd_hijack_checkpoint.py
+++ b/modules/sd_hijack_checkpoint.py
@@ -4,19 +4,16 @@ import ldm.modules.attention
 import ldm.modules.diffusionmodules.openaimodel
 
 
-# Setting flag=False so that torch skips checking parameters.
-# parameters checking is expensive in frequent operations.
-
 def BasicTransformerBlock_forward(self, x, context=None):
-    return checkpoint(self._forward, x, context, flag=False)
+    return checkpoint(self._forward, x, context)
 
 
 def AttentionBlock_forward(self, x):
-    return checkpoint(self._forward, x, flag=False)
+    return checkpoint(self._forward, x)
 
 
 def ResBlock_forward(self, x, emb):
-    return checkpoint(self._forward, x, emb, flag=False)
+    return checkpoint(self._forward, x, emb)
 
 
 stored = []