From 2b717bb195a3034853ed45a52c5752f010e1302b Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Tue, 23 Apr 2024 02:35:25 +0900
Subject: [PATCH 01/39] fix initial corrupt model loop

if for some reason the initial loading model at loading phase of webui  is corrupted
after entering this state the user will not be able to load even a good model is selected, due the the unload_model_weights  > send_model_to_cpu > m.lowvram attribute check will fail becaules m is None
webui will be stuck in the loop unable to recover without manual intervention
---
 modules/sd_models.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index ff245b7a6..1747ca621 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -659,10 +659,11 @@ def get_empty_cond(sd_model):
 
 
 def send_model_to_cpu(m):
-    if m.lowvram:
-        lowvram.send_everything_to_cpu()
-    else:
-        m.to(devices.cpu)
+    if m is not None:
+        if m.lowvram:
+            lowvram.send_everything_to_cpu()
+        else:
+            m.to(devices.cpu)
 
     devices.torch_gc()
 

From 4bc39d234d6535e3d8f8531d0c0f4e049261c922 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Tue, 23 Apr 2024 02:39:45 +0900
Subject: [PATCH 02/39] Show LoRA if model is None

---
 .../Lora/ui_extra_networks_lora.py            | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index b627f7dc2..e35d90c6e 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -60,18 +60,19 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         else:
             sd_version = lora_on_disk.sd_version
 
-        if shared.opts.lora_show_all or not enable_filter:
-            pass
-        elif sd_version == network.SdVersion.Unknown:
-            model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
-            if model_version.name in shared.opts.lora_hide_unknown_for_versions:
+        if shared.sd_model is not None:  # still show LoRA in case an error occurs during initial model loading
+            if shared.opts.lora_show_all or not enable_filter:
+                pass
+            elif sd_version == network.SdVersion.Unknown:
+                model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
+                if model_version.name in shared.opts.lora_hide_unknown_for_versions:
+                    return None
+            elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
+                return None
+            elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
+                return None
+            elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
                 return None
-        elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
-            return None
-        elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
-            return None
-        elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
-            return None
 
         return item
 

From a1aa0af8a45f4c30f1d3fce5635c090d64d4e55b Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Mon, 22 Apr 2024 23:38:44 -0400
Subject: [PATCH 03/39] add code for skipping CFG on early steps

---
 modules/sd_samplers_cfg_denoiser.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 93581c9ac..8ccc837aa 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -212,6 +212,11 @@ class CFGDenoiser(torch.nn.Module):
         uncond = denoiser_params.text_uncond
         skip_uncond = False
 
+        if self.step < shared.opts.skip_cond_steps:
+            skip_uncond = True
+            x_in = x_in[:-batch_size]
+            sigma_in = sigma_in[:-batch_size]
+
         # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
         if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
             skip_uncond = True

From 8016d78a4b9c8bdd02b0031694ad56553f89161e Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Mon, 22 Apr 2024 23:42:24 -0400
Subject: [PATCH 04/39] add option for early cfg skip

---
 modules/shared_options.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/shared_options.py b/modules/shared_options.py
index 326a317e0..2f70ef65a 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -380,7 +380,8 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
     'uni_pc_skip_type': OptionInfo("time_uniform", "UniPC skip type", gr.Radio, {"choices": ["time_uniform", "time_quadratic", "logSNR"]}, infotext='UniPC skip type'),
     'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"),
     'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
-    'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models")
+    'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"),
+    'skip_cond_steps': OptionInfo(0, "Skip CFG on first N steps of sampling", gr.Slider, {"minimum": 0, "maximum": 50, "step": 1}, infotext="Skip CFG first steps"),
 }))
 
 options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {

From 83266205d0b55ddbff34ea36b47f69c5ea11cc28 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 00:09:43 -0400
Subject: [PATCH 05/39] Add KL Optimal scheduler

---
 modules/sd_schedulers.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 75eb3ac03..10ae4e081 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -31,6 +31,15 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     return torch.FloatTensor(sigs).to(device)
 
 
+def kl_optimal(n, sigma_min, sigma_max, device):
+    alpha_min = torch.arctan(torch.tensor(sigma_min, device=device))
+    alpha_max = torch.arctan(torch.tensor(sigma_max, device=device))
+    sigmas = torch.empty((n+1,), device=device)
+    for i in range(n+1):
+        sigmas[i] = torch.tan((i/n) * alpha_min + (1.0-i/n) * alpha_max)
+    return sigmas
+
+
 schedulers = [
     Scheduler('automatic', 'Automatic', None),
     Scheduler('uniform', 'Uniform', uniform, need_inner_model=True),
@@ -38,6 +47,7 @@ schedulers = [
     Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential),
     Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0),
     Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]),
+    Scheduler('kl_optimal', 'KL Optimal', kl_optimal),
 ]
 
 schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}}

From 83182d2799f12ee2b5e5425d750db062ad67eb90 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:07:25 -0400
Subject: [PATCH 06/39] change skip early cond option name and to float

---
 modules/shared_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared_options.py b/modules/shared_options.py
index 2f70ef65a..91ba72b5e 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -381,7 +381,7 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
     'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"),
     'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
     'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"),
-    'skip_cond_steps': OptionInfo(0, "Skip CFG on first N steps of sampling", gr.Slider, {"minimum": 0, "maximum": 50, "step": 1}, infotext="Skip CFG first steps"),
+    'skip_early_cond': OptionInfo(0, "Skip CFG during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("CFG will be disabled (set to 1) on early steps, can both improve sample diversity/quality and speed up sampling"),
 }))
 
 options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {

From 6e9b69a33853e1bcee81cea6f01cf13de612fef7 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:08:28 -0400
Subject: [PATCH 07/39] change skip_early_cond code to use float

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 8ccc837aa..fba5c48c0 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -212,7 +212,7 @@ class CFGDenoiser(torch.nn.Module):
         uncond = denoiser_params.text_uncond
         skip_uncond = False
 
-        if self.step < shared.opts.skip_cond_steps:
+        if self.step / self.total_steps <= shared.opts.skip_early_cond:
             skip_uncond = True
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]

From 33cbbf9f8b46666a2325c98b723b6cb2ec192ef7 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:15:00 -0400
Subject: [PATCH 08/39] add s_min_uncond_all option

---
 modules/shared_options.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/shared_options.py b/modules/shared_options.py
index 91ba72b5e..c711fa5f6 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -210,6 +210,7 @@ options_templates.update(options_section(('img2img', "img2img", "sd"), {
 options_templates.update(options_section(('optimizations', "Optimizations", "sd"), {
     "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}),
     "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
+    "s_min_uncond_all": OptionInfo(False, "NGMS: Skip every step").info("makes Negative Guidance minimum sigma skip negative guidance on every step instead of only half"),
     "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"),

From 029adbe5318b57c04dbc0d92273cce38e1ecf457 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Tue, 23 Apr 2024 03:15:56 -0400
Subject: [PATCH 09/39] implement option to skip uncond on all steps below ngms

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index fba5c48c0..082a4f63c 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -218,7 +218,7 @@ class CFGDenoiser(torch.nn.Module):
             sigma_in = sigma_in[:-batch_size]
 
         # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
-        if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
+        if (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
             skip_uncond = True
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]

From 3a215deff23d28c06c8de98423c12628b8ce6326 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Sun, 28 Apr 2024 00:15:58 -0400
Subject: [PATCH 10/39] vectorize kl-optimal sigma calculation

Co-authored-by: mamei16 <marcel.1710@live.de>
---
 modules/sd_schedulers.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 10ae4e081..99a6f7be2 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -34,9 +34,8 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
 def kl_optimal(n, sigma_min, sigma_max, device):
     alpha_min = torch.arctan(torch.tensor(sigma_min, device=device))
     alpha_max = torch.arctan(torch.tensor(sigma_max, device=device))
-    sigmas = torch.empty((n+1,), device=device)
-    for i in range(n+1):
-        sigmas[i] = torch.tan((i/n) * alpha_min + (1.0-i/n) * alpha_max)
+    step_indices = torch.arange(n + 1, device=device)
+    sigmas = torch.tan(step_indices / n * alpha_min + (1.0 - step_indices / n) * alpha_max)
     return sigmas
 
 

From 579f1ef278080ff7545be3a42c5fe36fc2890887 Mon Sep 17 00:00:00 2001
From: missionfloyd <missionfloyd@users.noreply.github.com>
Date: Sun, 28 Apr 2024 22:36:43 -0600
Subject: [PATCH 11/39] Allow old sampler names in API

---
 modules/api/api.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/modules/api/api.py b/modules/api/api.py
index f468c3852..b1201fe77 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -48,6 +48,15 @@ def validate_sampler_name(name):
     return name
 
 
+def parse_old_sampler_name(name):
+    for scheduler in sd_schedulers.schedulers:
+        for scheduler_name in [scheduler.label, scheduler.name, *(scheduler.aliases or [])]:
+            if name.endswith(" " + scheduler_name):
+                return name[0:-(len(scheduler_name) + 1)], scheduler_name
+
+    return name, "Automatic"
+
+
 def setUpscalers(req: dict):
     reqDict = vars(req)
     reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None)
@@ -438,15 +447,19 @@ class Api:
         self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner)
+        sampler, scheduler = parse_old_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index)
 
         populate = txt2imgreq.copy(update={  # Override __init__ params
-            "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
+            "sampler_name": validate_sampler_name(sampler),
             "do_not_save_samples": not txt2imgreq.save_images,
             "do_not_save_grid": not txt2imgreq.save_images,
         })
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
+        if not populate.scheduler:
+            populate.scheduler = scheduler
+
         args = vars(populate)
         args.pop('script_name', None)
         args.pop('script_args', None) # will refeed them to the pipeline directly after initializing them
@@ -502,9 +515,10 @@ class Api:
         self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner)
+        sampler, scheduler = parse_old_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index)
 
         populate = img2imgreq.copy(update={  # Override __init__ params
-            "sampler_name": validate_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index),
+            "sampler_name": validate_sampler_name(sampler),
             "do_not_save_samples": not img2imgreq.save_images,
             "do_not_save_grid": not img2imgreq.save_images,
             "mask": mask,
@@ -512,6 +526,9 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
+        if not populate.scheduler:
+            populate.scheduler = scheduler
+
         args = vars(populate)
         args.pop('include_init_images', None)  # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
         args.pop('script_name', None)

From c8336c45b98c2226923503e17b1d7f9170af0f8a Mon Sep 17 00:00:00 2001
From: missionfloyd <missionfloyd@users.noreply.github.com>
Date: Tue, 30 Apr 2024 01:53:41 -0600
Subject: [PATCH 12/39] Use existing function for old sampler names

---
 modules/api/api.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/modules/api/api.py b/modules/api/api.py
index b1201fe77..d8e54529b 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -48,15 +48,6 @@ def validate_sampler_name(name):
     return name
 
 
-def parse_old_sampler_name(name):
-    for scheduler in sd_schedulers.schedulers:
-        for scheduler_name in [scheduler.label, scheduler.name, *(scheduler.aliases or [])]:
-            if name.endswith(" " + scheduler_name):
-                return name[0:-(len(scheduler_name) + 1)], scheduler_name
-
-    return name, "Automatic"
-
-
 def setUpscalers(req: dict):
     reqDict = vars(req)
     reqDict['extras_upscaler_1'] = reqDict.pop('upscaler_1', None)
@@ -447,7 +438,7 @@ class Api:
         self.apply_infotext(txt2imgreq, "txt2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(txt2imgreq.script_name, script_runner)
-        sampler, scheduler = parse_old_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index)
+        sampler, scheduler = sd_samplers.get_sampler_and_scheduler(txt2imgreq.sampler_name or txt2imgreq.sampler_index, txt2imgreq.scheduler)
 
         populate = txt2imgreq.copy(update={  # Override __init__ params
             "sampler_name": validate_sampler_name(sampler),
@@ -457,7 +448,7 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
-        if not populate.scheduler:
+        if not populate.scheduler and scheduler != "Automatic":
             populate.scheduler = scheduler
 
         args = vars(populate)
@@ -515,7 +506,7 @@ class Api:
         self.apply_infotext(img2imgreq, "img2img", script_runner=script_runner, mentioned_script_args=infotext_script_args)
 
         selectable_scripts, selectable_script_idx = self.get_selectable_script(img2imgreq.script_name, script_runner)
-        sampler, scheduler = parse_old_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index)
+        sampler, scheduler = sd_samplers.get_sampler_and_scheduler(img2imgreq.sampler_name or img2imgreq.sampler_index, img2imgreq.scheduler)
 
         populate = img2imgreq.copy(update={  # Override __init__ params
             "sampler_name": validate_sampler_name(sampler),
@@ -526,7 +517,7 @@ class Api:
         if populate.sampler_name:
             populate.sampler_index = None  # prevent a warning later on
 
-        if not populate.scheduler:
+        if not populate.scheduler and scheduler != "Automatic":
             populate.scheduler = scheduler
 
         args = vars(populate)

From 5d5224b322e8dbd817469a32d6c5578faff2df2f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Thu, 2 May 2024 02:25:16 +0900
Subject: [PATCH 13/39] fix_p_invalid_sampler_and_scheduler

---
 modules/processing.py  | 3 +++
 modules/sd_samplers.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index 76557dd7f..cb646e2bf 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -842,6 +842,9 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
         sd_models.apply_token_merging(p.sd_model, p.get_token_merging_ratio())
 
+        # backwards compatibility, fix sampler and scheduler if invalid
+        sd_samplers.fix_p_invalid_sampler_and_scheduler(p)
+
         res = process_images_inner(p)
 
     finally:
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 6b7b84b6d..b8abac4a9 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import functools
-
+import logging
 from modules import sd_samplers_kdiffusion, sd_samplers_timesteps, sd_samplers_lcm, shared, sd_samplers_common, sd_schedulers
 
 # imports for functions that previously were here and are used by other modules
@@ -122,4 +122,11 @@ def get_sampler_and_scheduler(sampler_name, scheduler_name):
     return sampler.name, found_scheduler.label
 
 
+def fix_p_invalid_sampler_and_scheduler(p):
+    i_sampler_name, i_scheduler = p.sampler_name, p.scheduler
+    p.sampler_name, p.scheduler = get_sampler_and_scheduler(p.sampler_name, p.scheduler)
+    if p.sampler_name != i_sampler_name or i_scheduler != p.scheduler:
+        logging.warning(f'Sampler Scheduler autocorrection: "{i_sampler_name}" -> "{p.sampler_name}", "{i_scheduler}" -> "{p.scheduler}"')
+
+
 set_samplers()

From 73d1caf8f28a387f2db5a77a8892edad8ed505a0 Mon Sep 17 00:00:00 2001
From: Logan <loganbooker@gmail.com>
Date: Fri, 10 May 2024 12:38:10 +1000
Subject: [PATCH 14/39] Add Align Your Steps to available schedulers

* Include both SDXL and SD 1.5 variants (https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html)
---
 modules/sd_schedulers.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 75eb3ac03..2131eae46 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -4,6 +4,7 @@ import torch
 
 import k_diffusion
 
+import numpy as np
 
 @dataclasses.dataclass
 class Scheduler:
@@ -30,6 +31,35 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     sigs += [0.0]
     return torch.FloatTensor(sigs).to(device)
 
+def get_align_your_steps_sigmas(n, device, sigma_id):
+    # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
+    def loglinear_interp(t_steps, num_steps):
+        """
+        Performs log-linear interpolation of a given array of decreasing numbers.
+        """
+        xs = np.linspace(0, 1, len(t_steps))
+        ys = np.log(t_steps[::-1])
+
+        new_xs = np.linspace(0, 1, num_steps)
+        new_ys = np.interp(new_xs, xs, ys)
+
+        interped_ys = np.exp(new_ys)[::-1].copy()
+        return interped_ys
+
+    if sigma_id == "sdxl":
+        sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.029]
+    elif sigma_id == "sd15":
+        sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
+    else:
+        print(f'Align Your Steps sigma identifier "{sigma_id}" not recognized, defaulting to SD 1.5.')
+        sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
+
+    if n != len(sigmas):
+        sigmas = np.append(loglinear_interp(sigmas, n), [0.0])
+    else:
+        sigmas.append(0.0)
+
+    return torch.FloatTensor(sigmas).to(device)
 
 schedulers = [
     Scheduler('automatic', 'Automatic', None),
@@ -38,6 +68,8 @@ schedulers = [
     Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential),
     Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0),
     Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]),
+    Scheduler('align_your_steps_sdxl', 'Align Your Steps (SDXL)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sdxl")),
+    Scheduler('align_your_steps_sd15', 'Align Your Steps (SD 1.5)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sd15")),
 ]
 
 schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}}

From d6b4444069d36cf7554eb9932061ecf43e9b1335 Mon Sep 17 00:00:00 2001
From: Logan <loganbooker@gmail.com>
Date: Fri, 10 May 2024 18:05:45 +1000
Subject: [PATCH 15/39] Use shared.sd_model.is_sdxl to determine base AYS
 sigmas

---
 modules/sd_schedulers.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 2131eae46..0ac1f7a21 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -6,6 +6,8 @@ import k_diffusion
 
 import numpy as np
 
+from modules import shared
+
 @dataclasses.dataclass
 class Scheduler:
     name: str
@@ -31,7 +33,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     sigs += [0.0]
     return torch.FloatTensor(sigs).to(device)
 
-def get_align_your_steps_sigmas(n, device, sigma_id):
+def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device):
     # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
     def loglinear_interp(t_steps, num_steps):
         """
@@ -46,12 +48,10 @@ def get_align_your_steps_sigmas(n, device, sigma_id):
         interped_ys = np.exp(new_ys)[::-1].copy()
         return interped_ys
 
-    if sigma_id == "sdxl":
+    if shared.sd_model.is_sdxl:
         sigmas = [14.615, 6.315, 3.771, 2.181, 1.342, 0.862, 0.555, 0.380, 0.234, 0.113, 0.029]
-    elif sigma_id == "sd15":
-        sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
     else:
-        print(f'Align Your Steps sigma identifier "{sigma_id}" not recognized, defaulting to SD 1.5.')
+        # Default to SD 1.5 sigmas.
         sigmas = [14.615, 6.475, 3.861, 2.697, 1.886, 1.396, 0.963, 0.652, 0.399, 0.152, 0.029]
 
     if n != len(sigmas):
@@ -68,8 +68,7 @@ schedulers = [
     Scheduler('exponential', 'Exponential', k_diffusion.sampling.get_sigmas_exponential),
     Scheduler('polyexponential', 'Polyexponential', k_diffusion.sampling.get_sigmas_polyexponential, default_rho=1.0),
     Scheduler('sgm_uniform', 'SGM Uniform', sgm_uniform, need_inner_model=True, aliases=["SGMUniform"]),
-    Scheduler('align_your_steps_sdxl', 'Align Your Steps (SDXL)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sdxl")),
-    Scheduler('align_your_steps_sd15', 'Align Your Steps (SD 1.5)', lambda n, sigma_min, sigma_max, device: get_align_your_steps_sigmas(n, device, "sd15")),
+    Scheduler('align_your_steps', 'Align Your Steps', get_align_your_steps_sigmas),
 ]
 
 schedulers_map = {**{x.name: x for x in schedulers}, **{x.label: x for x in schedulers}}

From 0e98529365477a4f240b2ac67d94ff59235144c5 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 15:46:53 -0400
Subject: [PATCH 16/39] Replace einops.rearrange with torch native

---
 modules/sd_hijack_optimizations.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 7f9e328d0..4c2dc56d4 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -486,7 +486,19 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
     k_in = self.to_k(context_k)
     v_in = self.to_v(context_v)
 
-    q, k, v = (rearrange(t, 'b n (h d) -> b n h d', h=h) for t in (q_in, k_in, v_in))
+    def _reshape(t):
+        """rearrange(t, 'b n (h d) -> b n h d', h=h).
+        Using torch native operations to avoid overhead as this function is
+        called frequently. (70 times/it for SDXL)
+        """
+        b, n, _ = t.shape  # Get the batch size (b) and sequence length (n)
+        d = t.shape[2] // h  # Determine the depth per head
+        return t.reshape(b, n, h, d)
+
+    q = _reshape(q_in)
+    k = _reshape(k_in)
+    v = _reshape(v_in)
+
     del q_in, k_in, v_in
 
     dtype = q.dtype
@@ -497,7 +509,9 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
 
     out = out.to(dtype)
 
-    out = rearrange(out, 'b n h d -> b n (h d)', h=h)
+    # out = rearrange(out, 'b n h d -> b n (h d)', h=h)
+    b, n, h, d = out.shape
+    out = out.reshape(b, n, h * d)
     return self.to_out(out)
 
 

From 2a8a60c2c50473f0ece5804d4a2cde0d1ff3d35e Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Thu, 16 May 2024 19:50:06 -0400
Subject: [PATCH 17/39] Add --precision half cmd option

---
 modules/cmd_args.py        |  2 +-
 modules/devices.py         | 24 ++++++++++++++++++++++++
 modules/sd_hijack_unet.py  | 29 ++++++++++++++++++++++-------
 modules/sd_hijack_utils.py | 26 +++++++++++++++-----------
 modules/sd_models.py       |  1 +
 modules/shared_init.py     |  8 ++++++++
 6 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 016a33d10..58c5e5d5b 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -41,7 +41,7 @@ parser.add_argument("--lowvram", action='store_true', help="enable stable diffus
 parser.add_argument("--lowram", action='store_true', help="load stable diffusion checkpoint weights to VRAM instead of RAM")
 parser.add_argument("--always-batch-cond-uncond", action='store_true', help="does not do anything")
 parser.add_argument("--unload-gfpgan", action='store_true', help="does not do anything.")
-parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast")
+parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "half", "autocast"], default="autocast")
 parser.add_argument("--upcast-sampling", action='store_true', help="upcast sampling. No effect with --no-half. Usually produces similar results to --no-half with better performance while using less memory.")
 parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site")
 parser.add_argument("--ngrok", type=str, help="ngrok authtoken, alternative to gradio --share", default=None)
diff --git a/modules/devices.py b/modules/devices.py
index e4f671ac6..7de34ac51 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -114,6 +114,9 @@ errors.run(enable_tf32, "Enabling TF32")
 
 cpu: torch.device = torch.device("cpu")
 fp8: bool = False
+# Force fp16 for all models in inference. No casting during inference.
+# This flag is controlled by "--precision half" command line arg.
+force_fp16: bool = False
 device: torch.device = None
 device_interrogate: torch.device = None
 device_gfpgan: torch.device = None
@@ -127,6 +130,8 @@ unet_needs_upcast = False
 
 
 def cond_cast_unet(input):
+    if force_fp16:
+        return input.to(torch.float16)
     return input.to(dtype_unet) if unet_needs_upcast else input
 
 
@@ -206,6 +211,11 @@ def autocast(disable=False):
     if disable:
         return contextlib.nullcontext()
 
+    if force_fp16:
+        # No casting during inference if force_fp16 is enabled.
+        # All tensor dtype conversion happens before inference.
+        return contextlib.nullcontext()
+
     if fp8 and device==cpu:
         return torch.autocast("cpu", dtype=torch.bfloat16, enabled=True)
 
@@ -269,3 +279,17 @@ def first_time_calculation():
     x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
     conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
     conv2d(x)
+
+
+def force_model_fp16():
+    """
+    ldm and sgm has modules.diffusionmodules.util.GroupNorm32.forward, which
+    force conversion of input to float32. If force_fp16 is enabled, we need to
+    prevent this casting.
+    """
+    assert force_fp16
+    import sgm.modules.diffusionmodules.util as sgm_util
+    import ldm.modules.diffusionmodules.util as ldm_util
+    sgm_util.GroupNorm32 = torch.nn.GroupNorm
+    ldm_util.GroupNorm32 = torch.nn.GroupNorm
+    print("ldm/sgm GroupNorm32 replaced with normal torch.nn.GroupNorm due to `--precision half`.")
diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 2101f1a04..41955313a 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -36,7 +36,7 @@ th = TorchHijackForUnet()
 
 # Below are monkey patches to enable upcasting a float16 UNet for float32 sampling
 def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
-
+    """Always make sure inputs to unet are in correct dtype."""
     if isinstance(cond, dict):
         for y in cond.keys():
             if isinstance(cond[y], list):
@@ -45,7 +45,11 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
                 cond[y] = cond[y].to(devices.dtype_unet) if isinstance(cond[y], torch.Tensor) else cond[y]
 
     with devices.autocast():
-        return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float()
+        result = orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs)
+        if devices.unet_needs_upcast:
+            return result.float()
+        else:
+            return result
 
 
 class GELUHijack(torch.nn.GELU, torch.nn.Module):
@@ -64,12 +68,11 @@ def hijack_ddpm_edit():
     if not ddpm_edit_hijack:
         CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond)
         CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
-        ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
+        ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model)
 
 
 unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
-CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
-CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
+
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)
     CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast)
@@ -81,5 +84,17 @@ CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.decode_first_stage', first_s
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond)
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.get_first_stage_encoding', lambda orig_func, *args, **kwargs: orig_func(*args, **kwargs).float(), first_stage_cond)
 
-CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model, unet_needs_upcast)
-CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
+CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model)
+CondFunc('sgm.modules.diffusionmodules.wrappers.OpenAIWrapper.forward', apply_model)
+
+
+def timestep_embedding_cast_result(orig_func, timesteps, *args, **kwargs):
+    if devices.unet_needs_upcast and timesteps.dtype == torch.int64:
+        dtype = torch.float32
+    else:
+        dtype = devices.dtype_unet
+    return orig_func(timesteps, *args, **kwargs).to(dtype=dtype)
+
+
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
+CondFunc('sgm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding_cast_result)
diff --git a/modules/sd_hijack_utils.py b/modules/sd_hijack_utils.py
index 79bf6e468..546f2eda4 100644
--- a/modules/sd_hijack_utils.py
+++ b/modules/sd_hijack_utils.py
@@ -1,7 +1,11 @@
 import importlib
 
+
+always_true_func = lambda *args, **kwargs: True
+
+
 class CondFunc:
-    def __new__(cls, orig_func, sub_func, cond_func):
+    def __new__(cls, orig_func, sub_func, cond_func=always_true_func):
         self = super(CondFunc, cls).__new__(cls)
         if isinstance(orig_func, str):
             func_path = orig_func.split('.')
@@ -20,13 +24,13 @@ class CondFunc:
                 print(f"Warning: Failed to resolve {orig_func} for CondFunc hijack")
                 pass
         self.__init__(orig_func, sub_func, cond_func)
-        return lambda *args, **kwargs: self(*args, **kwargs)
-    def __init__(self, orig_func, sub_func, cond_func):
-        self.__orig_func = orig_func
-        self.__sub_func = sub_func
-        self.__cond_func = cond_func
-    def __call__(self, *args, **kwargs):
-        if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs):
-            return self.__sub_func(self.__orig_func, *args, **kwargs)
-        else:
-            return self.__orig_func(*args, **kwargs)
+        return lambda *args, **kwargs: self(*args, **kwargs)
+    def __init__(self, orig_func, sub_func, cond_func):
+        self.__orig_func = orig_func
+        self.__sub_func = sub_func
+        self.__cond_func = cond_func
+    def __call__(self, *args, **kwargs):
+        if not self.__cond_func or self.__cond_func(self.__orig_func, *args, **kwargs):
+            return self.__sub_func(self.__orig_func, *args, **kwargs)
+        else:
+            return self.__orig_func(*args, **kwargs)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ff245b7a6..9c5909168 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -403,6 +403,7 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer
         model.float()
         model.alphas_cumprod_original = model.alphas_cumprod
         devices.dtype_unet = torch.float32
+        assert shared.cmd_opts.precision != "half", "Cannot use --precision half with --no-half"
         timer.record("apply float()")
     else:
         vae = model.first_stage_model
diff --git a/modules/shared_init.py b/modules/shared_init.py
index 935e3a21c..a6ad0433d 100644
--- a/modules/shared_init.py
+++ b/modules/shared_init.py
@@ -31,6 +31,14 @@ def initialize():
     devices.dtype_vae = torch.float32 if cmd_opts.no_half or cmd_opts.no_half_vae else torch.float16
     devices.dtype_inference = torch.float32 if cmd_opts.precision == 'full' else devices.dtype
 
+    if cmd_opts.precision == "half":
+        msg = "--no-half and --no-half-vae conflict with --precision half"
+        assert devices.dtype == torch.float16, msg
+        assert devices.dtype_vae == torch.float16, msg
+        assert devices.dtype_inference == torch.float16, msg
+        devices.force_fp16 = True
+        devices.force_model_fp16()
+
     shared.device = devices.device
     shared.weight_load_location = None if cmd_opts.lowram else "cpu"
 

From 53d67088ee0fb190c3ae1330c2b876dedb16dd8b Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Fri, 17 May 2024 12:12:57 -0400
Subject: [PATCH 18/39] Patch timestep embedding to create tensor on-device

---
 modules/sd_hijack_unet.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 2101f1a04..0dabbe0e4 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -1,5 +1,7 @@
 import torch
 from packaging import version
+from einops import repeat
+import math
 
 from modules import devices
 from modules.sd_hijack_utils import CondFunc
@@ -48,6 +50,30 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs):
         return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float()
 
 
+# Monkey patch to create timestep embed tensor on device, avoiding a block.
+def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False):
+    """
+    Create sinusoidal timestep embeddings.
+    :param timesteps: a 1-D Tensor of N indices, one per batch element.
+                      These may be fractional.
+    :param dim: the dimension of the output.
+    :param max_period: controls the minimum frequency of the embeddings.
+    :return: an [N x dim] Tensor of positional embeddings.
+    """
+    if not repeat_only:
+        half = dim // 2
+        freqs = torch.exp(
+            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half
+        )
+        args = timesteps[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    else:
+        embedding = repeat(timesteps, 'b -> b d', d=dim)
+    return embedding
+
+
 class GELUHijack(torch.nn.GELU, torch.nn.Module):
     def __init__(self, *args, **kwargs):
         torch.nn.GELU.__init__(self, *args, **kwargs)
@@ -69,6 +95,7 @@ def hijack_ddpm_edit():
 
 unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding)
 CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)

From cc9ca67664ef72931af9a4dced88a8434c5d4f16 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Fri, 17 May 2024 13:14:26 -0400
Subject: [PATCH 19/39] Add transformer forward patch

---
 modules/sd_hijack_unet.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 0dabbe0e4..c680367eb 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -74,6 +74,30 @@ def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False):
     return embedding
 
 
+# Monkey patch to SpatialTransformer removing unnecessary contiguous calls.
+# Prevents a lot of unnecessary aten::copy_ calls
+def spatial_transformer_forward(_, self, x: torch.Tensor, context=None):
+    # note: if no context is given, cross-attention defaults to self-attention
+    if not isinstance(context, list):
+        context = [context]
+    b, c, h, w = x.shape
+    x_in = x
+    x = self.norm(x)
+    if not self.use_linear:
+        x = self.proj_in(x)
+    x = x.permute(0, 2, 3, 1).reshape(b, h * w, c)
+    if self.use_linear:
+        x = self.proj_in(x)
+    for i, block in enumerate(self.transformer_blocks):
+        x = block(x, context=context[i])
+    if self.use_linear:
+        x = self.proj_out(x)
+    x = x.view(b, h, w, c).permute(0, 3, 1, 2)
+    if not self.use_linear:
+        x = self.proj_out(x)
+    return x + x_in
+
+
 class GELUHijack(torch.nn.GELU, torch.nn.Module):
     def __init__(self, *args, **kwargs):
         torch.nn.GELU.__init__(self, *args, **kwargs)
@@ -95,7 +119,8 @@ def hijack_ddpm_edit():
 
 unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast
 CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast)
-CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding)
+CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', timestep_embedding, lambda *args, **kwargs: True)
+CondFunc('ldm.modules.attention.SpatialTransformer.forward', spatial_transformer_forward, lambda *args, **kwargs: True)
 CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast)
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)

From dca9007ac7a9852752d91d34d2ed1feaef6a03f2 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Fri, 17 May 2024 13:23:12 -0400
Subject: [PATCH 20/39] Fix SD15 dtype

---
 modules/sd_models.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 9c5909168..7d4ab0fd8 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -733,6 +733,10 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
             sd_model = instantiate_from_config(sd_config.model)
 
     sd_model.used_config = checkpoint_config
+    # ldm's Unet is using self.dtype to cast input tensor. If we do not overwrite
+    # UnetModel.dtype, it will be the default dtype from config.
+    # sgm's Unet is not using dtype for casting. The value will be ignored.
+    sd_model.model.diffusion_model.dtype = devices.dtype_unet
 
     timer.record("create model")
 

From b57a70f37322142939f7429f287599e027108bfc Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Fri, 17 May 2024 13:34:04 -0400
Subject: [PATCH 21/39] Proper fix of SD15 dtype

---
 modules/sd_models.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 7d4ab0fd8..26a5127cd 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -541,7 +541,7 @@ def repair_config(sd_config):
     if hasattr(sd_config.model.params, 'unet_config'):
         if shared.cmd_opts.no_half:
             sd_config.model.params.unet_config.params.use_fp16 = False
-        elif shared.cmd_opts.upcast_sampling:
+        elif shared.cmd_opts.upcast_sampling or shared.cmd_opts.precision == "half":
             sd_config.model.params.unet_config.params.use_fp16 = True
 
     if getattr(sd_config.model.params.first_stage_config.params.ddconfig, "attn_type", None) == "vanilla-xformers" and not shared.xformers_available:
@@ -733,10 +733,6 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None):
             sd_model = instantiate_from_config(sd_config.model)
 
     sd_model.used_config = checkpoint_config
-    # ldm's Unet is using self.dtype to cast input tensor. If we do not overwrite
-    # UnetModel.dtype, it will be the default dtype from config.
-    # sgm's Unet is not using dtype for casting. The value will be ignored.
-    sd_model.model.diffusion_model.dtype = devices.dtype_unet
 
     timer.record("create model")
 

From 1d7448281751ea3223c681a82de8219a6fbe1d22 Mon Sep 17 00:00:00 2001
From: Logan <loganbooker@gmail.com>
Date: Sat, 18 May 2024 09:09:57 +1000
Subject: [PATCH 22/39] Default device for sigma tensor to CPU

* Consistent with implementations in k-diffusion.
* Makes this compatible with https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15823
---
 modules/sd_schedulers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_schedulers.py b/modules/sd_schedulers.py
index 0ac1f7a21..4ddb77850 100644
--- a/modules/sd_schedulers.py
+++ b/modules/sd_schedulers.py
@@ -33,7 +33,7 @@ def sgm_uniform(n, sigma_min, sigma_max, inner_model, device):
     sigs += [0.0]
     return torch.FloatTensor(sigs).to(device)
 
-def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device):
+def get_align_your_steps_sigmas(n, sigma_min, sigma_max, device='cpu'):
     # https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
     def loglinear_interp(t_steps, num_steps):
         """

From feeb6802aa71fad190da2e051e50af84a94eda85 Mon Sep 17 00:00:00 2001
From: drhead <1313496+drhead@users.noreply.github.com>
Date: Sat, 18 May 2024 01:22:31 -0400
Subject: [PATCH 23/39] fix case where first step skilled if skip early cond is
 0

---
 modules/sd_samplers_cfg_denoiser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index 082a4f63c..d89ea2c8b 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -212,7 +212,7 @@ class CFGDenoiser(torch.nn.Module):
         uncond = denoiser_params.text_uncond
         skip_uncond = False
 
-        if self.step / self.total_steps <= shared.opts.skip_early_cond:
+        if shared.opts.skip_early_cond != 0. and self.step / self.total_steps <= shared.opts.skip_early_cond:
             skip_uncond = True
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]

From 82884da18c8f183c4ce0e7237953303f26610370 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 04:55:45 +0900
Subject: [PATCH 24/39] use apply_override for Clip skip

---
 scripts/xyz_grid.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index b9fd66fe5..c7cb51333 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -264,7 +264,7 @@ axis_options = [
     AxisOption("Schedule max sigma", float, apply_override("sigma_max")),
     AxisOption("Schedule rho", float, apply_override("rho")),
     AxisOption("Eta", float, apply_field("eta")),
-    AxisOption("Clip skip", int, apply_clip_skip),
+    AxisOption("Clip skip", int, apply_override('CLIP_stop_at_last_layers')),
     AxisOption("Denoising", float, apply_field("denoising_strength")),
     AxisOption("Initial noise multiplier", float, apply_field("initial_noise_multiplier")),
     AxisOption("Extra noise", float, apply_override("img2img_extra_noise")),
@@ -399,7 +399,6 @@ def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend
 
 class SharedSettingsStackHelper(object):
     def __enter__(self):
-        self.CLIP_stop_at_last_layers = opts.CLIP_stop_at_last_layers
         self.vae = opts.sd_vae
         self.uni_pc_order = opts.uni_pc_order
 
@@ -409,8 +408,6 @@ class SharedSettingsStackHelper(object):
         modules.sd_models.reload_model_weights()
         modules.sd_vae.reload_vae_weights()
 
-        opts.data["CLIP_stop_at_last_layers"] = self.CLIP_stop_at_last_layers
-
 
 re_range = re.compile(r"\s*([+-]?\s*\d+)\s*-\s*([+-]?\s*\d+)(?:\s*\(([+-]\d+)\s*\))?\s*")
 re_range_float = re.compile(r"\s*([+-]?\s*\d+(?:.\d*)?)\s*-\s*([+-]?\s*\d+(?:.\d*)?)(?:\s*\(([+-]\d+(?:.\d*)?)\s*\))?\s*")

From 1f392517f8938e0082e189fa0c28f4eb89fb0eb2 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 04:59:05 +0900
Subject: [PATCH 25/39] use override for uni_pc_order

---
 scripts/xyz_grid.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index c7cb51333..622cc43c3 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -140,7 +140,7 @@ def apply_styles(p: StableDiffusionProcessingTxt2Img, x: str, _):
 
 
 def apply_uni_pc_order(p, x, xs):
-    opts.data["uni_pc_order"] = min(x, p.steps - 1)
+    p.override_settings['uni_pc_order'] = min(x, p.steps - 1)
 
 
 def apply_face_restore(p, opt, x):
@@ -400,11 +400,9 @@ def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend
 class SharedSettingsStackHelper(object):
     def __enter__(self):
         self.vae = opts.sd_vae
-        self.uni_pc_order = opts.uni_pc_order
 
     def __exit__(self, exc_type, exc_value, tb):
         opts.data["sd_vae"] = self.vae
-        opts.data["uni_pc_order"] = self.uni_pc_order
         modules.sd_models.reload_model_weights()
         modules.sd_vae.reload_vae_weights()
 

From 1e696b028adbd449df8c30ed760103b120ec5546 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 05:14:32 +0900
Subject: [PATCH 26/39] use override of sd_vae

---
 scripts/xyz_grid.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 622cc43c3..4c83e92b2 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -118,21 +118,16 @@ def apply_size(p, x: str, xs) -> None:
 
 
 def find_vae(name: str):
-    if name.lower() in ['auto', 'automatic']:
-        return modules.sd_vae.unspecified
-    if name.lower() == 'none':
-        return None
-    else:
-        choices = [x for x in sorted(modules.sd_vae.vae_dict, key=lambda x: len(x)) if name.lower().strip() in x.lower()]
-        if len(choices) == 0:
-            print(f"No VAE found for {name}; using automatic")
-            return modules.sd_vae.unspecified
-        else:
-            return modules.sd_vae.vae_dict[choices[0]]
+    match name := name.lower().strip():
+        case 'auto', 'automatic':
+            return 'Automatic'
+        case 'none':
+            return 'None'
+    return next((k for k in modules.sd_vae.vae_dict if k.lower() == name), print(f'No VAE found for {name}; using Automatic') or 'Automatic')
 
 
 def apply_vae(p, x, xs):
-    modules.sd_vae.reload_vae_weights(shared.sd_model, vae_file=find_vae(x))
+    p.override_settings['sd_vae'] = find_vae(x)
 
 
 def apply_styles(p: StableDiffusionProcessingTxt2Img, x: str, _):
@@ -270,7 +265,7 @@ axis_options = [
     AxisOption("Extra noise", float, apply_override("img2img_extra_noise")),
     AxisOptionTxt2Img("Hires upscaler", str, apply_field("hr_upscaler"), choices=lambda: [*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]]),
     AxisOptionImg2Img("Cond. Image Mask Weight", float, apply_field("inpainting_mask_weight")),
-    AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['None'] + list(sd_vae.vae_dict)),
+    AxisOption("VAE", str, apply_vae, cost=0.7, choices=lambda: ['Automatic', 'None'] + list(sd_vae.vae_dict)),
     AxisOption("Styles", str, apply_styles, choices=lambda: list(shared.prompt_styles.styles)),
     AxisOption("UniPC Order", int, apply_uni_pc_order, cost=0.5),
     AxisOption("Face restore", str, apply_face_restore, format_value=format_value),
@@ -399,10 +394,9 @@ def draw_xyz_grid(p, xs, ys, zs, x_labels, y_labels, z_labels, cell, draw_legend
 
 class SharedSettingsStackHelper(object):
     def __enter__(self):
-        self.vae = opts.sd_vae
+        pass
 
     def __exit__(self, exc_type, exc_value, tb):
-        opts.data["sd_vae"] = self.vae
         modules.sd_models.reload_model_weights()
         modules.sd_vae.reload_vae_weights()
 

From 51e7122f25c276b258a8f55a64e60e5b2265287f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sun, 19 May 2024 05:17:44 +0900
Subject: [PATCH 27/39] remove unused code

---
 scripts/xyz_grid.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 4c83e92b2..23dafd477 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -95,17 +95,6 @@ def confirm_checkpoints_or_none(p, xs):
             raise RuntimeError(f"Unknown checkpoint: {x}")
 
 
-def apply_clip_skip(p, x, xs):
-    opts.data["CLIP_stop_at_last_layers"] = x
-
-
-def apply_upscale_latent_space(p, x, xs):
-    if x.lower().strip() != '0':
-        opts.data["use_scale_latent_for_hires_fix"] = True
-    else:
-        opts.data["use_scale_latent_for_hires_fix"] = False
-
-
 def apply_size(p, x: str, xs) -> None:
     try:
         width, _, height = x.partition('x')

From 5867be2914c303c2f8ba86ff23dba4b31aeafa79 Mon Sep 17 00:00:00 2001
From: viking1304 <viking1304@gmail.com>
Date: Mon, 20 May 2024 23:44:17 +0200
Subject: [PATCH 28/39] Use different torch versions for Intel and ARM Macs

---
 webui-macos-env.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/webui-macos-env.sh b/webui-macos-env.sh
index db7e8b1a0..ad0736378 100644
--- a/webui-macos-env.sh
+++ b/webui-macos-env.sh
@@ -11,7 +11,12 @@ fi
 
 export install_dir="$HOME"
 export COMMANDLINE_ARGS="--skip-torch-cuda-test --upcast-sampling --no-half-vae --use-cpu interrogate"
-export TORCH_COMMAND="pip install torch==2.1.0 torchvision==0.16.0"
 export PYTORCH_ENABLE_MPS_FALLBACK=1
 
+if [[ "$(sysctl -n machdep.cpu.brand_string)" =~ ^.*"Intel".*$ ]]; then
+    export TORCH_COMMAND="pip install torch==2.1.2 torchvision==0.16.2"
+else
+    export TORCH_COMMAND="pip install torch==2.3.0 torchvision==0.18.0"
+fi
+
 ####################################################################

From 344eda55d4550e91b1a3e95f8e669084a74c876f Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 22 May 2024 23:06:07 +0900
Subject: [PATCH 29/39] ReloadUI backgroundColor --background-fill-primary

---
 javascript/ui.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/javascript/ui.js b/javascript/ui.js
index e0f5feebd..16faacebb 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -337,8 +337,8 @@ onOptionsChanged(function() {
 let txt2img_textarea, img2img_textarea = undefined;
 
 function restart_reload() {
+    document.body.style.backgroundColor = "var(--background-fill-primary)";
     document.body.innerHTML = '<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>';
-
     var requestPing = function() {
         requestGet("./internal/ping", {}, function(data) {
             location.reload();

From a63946233b71083f6726006b96fc16e3033ab844 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Sat, 25 May 2024 14:18:05 +0900
Subject: [PATCH 30/39] setuptools==69.5.1

---
 requirements_versions.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements_versions.txt b/requirements_versions.txt
index 3df74f3d6..3037a395b 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -1,3 +1,4 @@
+setuptools==69.5.1  # temp fix for compatibility with some old packages
 GitPython==3.1.32
 Pillow==9.5.0
 accelerate==0.21.0

From 6dd53ce63dc70b3fcf7f25402d40b48f50abdf74 Mon Sep 17 00:00:00 2001
From: alcacode <alca_addr@protonmail.com>
Date: Sun, 26 May 2024 15:36:55 +0200
Subject: [PATCH 31/39] Fix bug where file extension had an extra '.' under
 some circumstances

Fix bug where under some circumstances an extra "." was inserted between the file base name and the file extension.
The bug is triggered when the extension argument is one of "jpg", "jpeg", or "webp", and the image exceeds the format's dimension limit. Then the extension variable is set to ".png", resulting in the fullfn variable to evaluate to a string ending with "..png".
---
 modules/images.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/images.py b/modules/images.py
index c0ff8a630..1be176cdf 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -653,7 +653,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
     # WebP and JPG formats have maximum dimension limits of 16383 and 65535 respectively. switch to PNG which has a much higher limit
     if (image.height > 65535 or image.width > 65535) and extension.lower() in ("jpg", "jpeg") or (image.height > 16383 or image.width > 16383) and extension.lower() == "webp":
         print('Image dimensions too large; saving as PNG')
-        extension = ".png"
+        extension = "png"
 
     if save_to_dirs is None:
         save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt)

From 801b72b92b4f07e5d2fa9737b160762ea8f67088 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Tue, 28 May 2024 21:20:23 +0300
Subject: [PATCH 32/39] update changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 295d26c8c..5c16b5611 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 1.9.4
+
+### Bug Fixes:
+*  pin setuptools version to fix the startup error ([#15883](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15883)) 
+
 ## 1.9.3
 
 ### Bug Fixes:

From 8d6f7417385d1cacfd827800bdf02a0e8dd8f092 Mon Sep 17 00:00:00 2001
From: w-e-w <40751091+w-e-w@users.noreply.github.com>
Date: Wed, 29 May 2024 03:33:32 +0900
Subject: [PATCH 33/39] #15883 -> #15882

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5c16b5611..596b1ec45 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 ## 1.9.4
 
 ### Bug Fixes:
-*  pin setuptools version to fix the startup error ([#15883](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15883)) 
+*  pin setuptools version to fix the startup error ([#15882](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/15882)) 
 
 ## 1.9.3
 

From 10f8d0f84216e3642e960ea7118a5acc8a79546f Mon Sep 17 00:00:00 2001
From: eatmoreapple <eatmoreorange@gmail.com>
Date: Tue, 4 Jun 2024 15:02:13 +0800
Subject: [PATCH 34/39] feat: lora partial update precede full update.

---
 extensions-builtin/Lora/networks.py | 40 +++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 42b14dc23..18809364b 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -260,6 +260,16 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
 
     loaded_networks.clear()
 
+    unavailable_networks = []
+    for name in names:
+        if name.lower() in forbidden_network_aliases and available_networks.get(name) is None:
+            unavailable_networks.append(name)
+        elif available_network_aliases.get(name) is None:
+            unavailable_networks.append(name)
+
+    if unavailable_networks:
+        update_available_networks_by_names(unavailable_networks)
+
     networks_on_disk = [available_networks.get(name, None) if name.lower() in forbidden_network_aliases else available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
@@ -566,22 +576,16 @@ def network_MultiheadAttention_load_state_dict(self, *args, **kwargs):
     return originals.MultiheadAttention_load_state_dict(self, *args, **kwargs)
 
 
-def list_available_networks():
-    available_networks.clear()
-    available_network_aliases.clear()
-    forbidden_network_aliases.clear()
-    available_network_hash_lookup.clear()
-    forbidden_network_aliases.update({"none": 1, "Addams": 1})
-
-    os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
-
+def process_network_files(names: list[str] | None = None):
     candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
     candidates += list(shared.walk_files(shared.cmd_opts.lyco_dir_backcompat, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
     for filename in candidates:
         if os.path.isdir(filename):
             continue
-
         name = os.path.splitext(os.path.basename(filename))[0]
+        # if names is provided, only load networks with names in the list
+        if names and name not in names:
+            continue
         try:
             entry = network.NetworkOnDisk(name, filename)
         except OSError:  # should catch FileNotFoundError and PermissionError etc.
@@ -597,6 +601,22 @@ def list_available_networks():
         available_network_aliases[entry.alias] = entry
 
 
+def update_available_networks_by_names(names: list[str]):
+    process_network_files(names)
+
+
+def list_available_networks():
+    available_networks.clear()
+    available_network_aliases.clear()
+    forbidden_network_aliases.clear()
+    available_network_hash_lookup.clear()
+    forbidden_network_aliases.update({"none": 1, "Addams": 1})
+
+    os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
+
+    process_network_files()
+
+
 re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
 
 

From 25bbf31f5701b85804908a54b2f6af38a1d50f1f Mon Sep 17 00:00:00 2001
From: NouberNou <korewananda@gmail.com>
Date: Thu, 6 Jun 2024 16:22:49 -0700
Subject: [PATCH 35/39] Fix for grids without comprehensive infotexts

When generating grids, some scripts such as img2img loopback and ultimate SD upscale do not pass infotexts for each image since they are the same prompt.

If you attempt to save those images using the saved button in the UI it will fail because it will look for the selected image info text. This fixes those errors by replicating the infotext for as many images are passed into the image list if the infotext parameter is none.
---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index 76557dd7f..cb37a77df 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -569,7 +569,7 @@ class Processed:
         self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt]
         self.all_seeds = all_seeds or p.all_seeds or [self.seed]
         self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
-        self.infotexts = infotexts or [info]
+        self.infotexts = infotexts or [info] * len(image_list)
         self.version = program_version()
 
     def js(self):

From 53f62674ae55e84aff4d4c9ed104ba9dce8ae887 Mon Sep 17 00:00:00 2001
From: NouberNou <korewananda@gmail.com>
Date: Thu, 6 Jun 2024 16:30:01 -0700
Subject: [PATCH 36/39] Typo on edit

Edited in fix in Github editor and mistyped from local copy
---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index cb37a77df..c22da4169 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -569,7 +569,7 @@ class Processed:
         self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt]
         self.all_seeds = all_seeds or p.all_seeds or [self.seed]
         self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed]
-        self.infotexts = infotexts or [info] * len(image_list)
+        self.infotexts = infotexts or [info] * len(images_list)
         self.version = program_version()
 
     def js(self):

From 0769aa318a1896ccf74f57e6e943eb6b5fab5051 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 09:05:35 +0300
Subject: [PATCH 37/39] integrated edits as recommended in the PR #15804

---
 modules/sd_hijack_optimizations.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 4c2dc56d4..0269f1f5b 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -486,18 +486,7 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
     k_in = self.to_k(context_k)
     v_in = self.to_v(context_v)
 
-    def _reshape(t):
-        """rearrange(t, 'b n (h d) -> b n h d', h=h).
-        Using torch native operations to avoid overhead as this function is
-        called frequently. (70 times/it for SDXL)
-        """
-        b, n, _ = t.shape  # Get the batch size (b) and sequence length (n)
-        d = t.shape[2] // h  # Determine the depth per head
-        return t.reshape(b, n, h, d)
-
-    q = _reshape(q_in)
-    k = _reshape(k_in)
-    v = _reshape(v_in)
+    q, k, v = (t.reshape(t.shape[0], t.shape[1], h, -1) for t in (q_in, k_in, v_in))
 
     del q_in, k_in, v_in
 
@@ -509,7 +498,6 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
 
     out = out.to(dtype)
 
-    # out = rearrange(out, 'b n h d -> b n (h d)', h=h)
     b, n, h, d = out.shape
     out = out.reshape(b, n, h * d)
     return self.to_out(out)

From 5429e4cff514df2f4cab242212ba347741eadc08 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 09:56:09 +0300
Subject: [PATCH 38/39] add proper infotext support for #15607 fix settings
 override not working for NGMI, s_churn, etc...

---
 modules/processing.py               | 14 ++++++++------
 modules/sd_samplers_cfg_denoiser.py | 12 +++++++-----
 modules/shared_options.py           |  6 +++---
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index c22da4169..97a7162aa 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -238,11 +238,6 @@ class StableDiffusionProcessing:
             self.styles = []
 
         self.sampler_noise_scheduler_override = None
-        self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond
-        self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn
-        self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin
-        self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf')
-        self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise
 
         self.extra_generation_params = self.extra_generation_params or {}
         self.override_settings = self.override_settings or {}
@@ -259,6 +254,13 @@ class StableDiffusionProcessing:
         self.cached_uc = StableDiffusionProcessing.cached_uc
         self.cached_c = StableDiffusionProcessing.cached_c
 
+    def fill_fields_from_opts(self):
+        self.s_min_uncond = self.s_min_uncond if self.s_min_uncond is not None else opts.s_min_uncond
+        self.s_churn = self.s_churn if self.s_churn is not None else opts.s_churn
+        self.s_tmin = self.s_tmin if self.s_tmin is not None else opts.s_tmin
+        self.s_tmax = (self.s_tmax if self.s_tmax is not None else opts.s_tmax) or float('inf')
+        self.s_noise = self.s_noise if self.s_noise is not None else opts.s_noise
+
     @property
     def sd_model(self):
         return shared.sd_model
@@ -794,7 +796,6 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr,
         "Init image hash": getattr(p, 'init_img_hash', None),
         "RNG": opts.randn_source if opts.randn_source != "GPU" else None,
-        "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond,
         "Tiling": "True" if p.tiling else None,
         **p.extra_generation_params,
         "Version": program_version() if opts.add_version_to_infotext else None,
@@ -890,6 +891,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     modules.sd_hijack.model_hijack.apply_circular(p.tiling)
     modules.sd_hijack.model_hijack.clear_comments()
 
+    p.fill_fields_from_opts()
     p.setup_prompts()
 
     if isinstance(seed, list):
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index d89ea2c8b..f48f58a50 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -214,12 +214,14 @@ class CFGDenoiser(torch.nn.Module):
 
         if shared.opts.skip_early_cond != 0. and self.step / self.total_steps <= shared.opts.skip_early_cond:
             skip_uncond = True
-            x_in = x_in[:-batch_size]
-            sigma_in = sigma_in[:-batch_size]
-
-        # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
-        if (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
+            self.p.extra_generation_params["Skip Early CFG"] = shared.opts.skip_early_cond
+        elif (self.step % 2 or shared.opts.s_min_uncond_all) and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
             skip_uncond = True
+            self.p.extra_generation_params["NGMS"] = s_min_uncond
+            if shared.opts.s_min_uncond_all:
+                self.p.extra_generation_params["NGMS all steps"] = shared.opts.s_min_uncond_all
+
+        if skip_uncond:
             x_in = x_in[:-batch_size]
             sigma_in = sigma_in[:-batch_size]
 
diff --git a/modules/shared_options.py b/modules/shared_options.py
index c711fa5f6..05c3d9391 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -209,8 +209,8 @@ options_templates.update(options_section(('img2img', "img2img", "sd"), {
 
 options_templates.update(options_section(('optimizations', "Optimizations", "sd"), {
     "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}),
-    "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
-    "s_min_uncond_all": OptionInfo(False, "NGMS: Skip every step").info("makes Negative Guidance minimum sigma skip negative guidance on every step instead of only half"),
+    "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}, infotext='NGMS').link("PR", "https://github.com/AUTOMATIC1111/stablediffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
+    "s_min_uncond_all": OptionInfo(False, "Negative Guidance minimum sigma all steps", infotext='NGMS all steps').info("By default, NGMS above skips every other step; this makes it skip all steps"),
     "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
     "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
     "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio hr').info("only applies if non-zero and overrides above"),
@@ -382,7 +382,7 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
     'uni_pc_order': OptionInfo(3, "UniPC order", gr.Slider, {"minimum": 1, "maximum": 50, "step": 1}, infotext='UniPC order').info("must be < sampling steps"),
     'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
     'sd_noise_schedule': OptionInfo("Default", "Noise schedule for sampling", gr.Radio, {"choices": ["Default", "Zero Terminal SNR"]}, infotext="Noise Schedule").info("for use with zero terminal SNR trained models"),
-    'skip_early_cond': OptionInfo(0, "Skip CFG during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("CFG will be disabled (set to 1) on early steps, can both improve sample diversity/quality and speed up sampling"),
+    'skip_early_cond': OptionInfo(0.0, "Ignore negative prompt during early sampling", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext="Skip Early CFG").info("disables CFG on a proportion of steps at the beginning of generation; 0=skip none; 1=skip all; can both improve sample diversity/quality and speed up sampling"),
 }))
 
 options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {

From cd9e9e404955df19a72c832d68888db44ab7b382 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Sat, 8 Jun 2024 10:13:38 +0300
Subject: [PATCH 39/39] remove unneeded tabulation

---
 .../Lora/ui_extra_networks_lora.py            | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index e35d90c6e..3e34d69dc 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -60,19 +60,18 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
         else:
             sd_version = lora_on_disk.sd_version
 
-        if shared.sd_model is not None:  # still show LoRA in case an error occurs during initial model loading
-            if shared.opts.lora_show_all or not enable_filter:
-                pass
-            elif sd_version == network.SdVersion.Unknown:
-                model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
-                if model_version.name in shared.opts.lora_hide_unknown_for_versions:
-                    return None
-            elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
-                return None
-            elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
-                return None
-            elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
+        if shared.opts.lora_show_all or not enable_filter or not shared.sd_model:
+            pass
+        elif sd_version == network.SdVersion.Unknown:
+            model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
+            if model_version.name in shared.opts.lora_hide_unknown_for_versions:
                 return None
+        elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
+            return None
+        elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
+            return None
+        elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
+            return None
 
         return item