From e4d93225f7fe8ad87312453110e4516b764853c7 Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 18 Sep 2023 14:53:17 -0400
Subject: [PATCH 1/5] fix train21.json ema param names

---
 trainSD21.json | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/trainSD21.json b/trainSD21.json
index 9764522..01347ef 100644
--- a/trainSD21.json
+++ b/trainSD21.json
@@ -45,9 +45,9 @@
   "min_snr_gamma": null,
   "ema_decay_rate": null,
   "ema_decay_target": null,
-  "ema_decay_interval": null,
-  "ema_decay_device": null,
-  "ema_decay_sample_raw_training": false,
-  "ema_decay_sample_ema_model": false,
-  "ema_decay_resume_model" : null
+  "ema_update_interval": null,
+  "ema_device": null,
+  "ema_sample_raw_training": false,
+  "ema_sample_ema_model": false,
+  "ema_resume_model" : null
 }

From 2f52832209267d904d53b799534549e9f1484aa7 Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 18 Sep 2023 14:54:43 -0400
Subject: [PATCH 2/5] fix trainSD21.json and advanced tweaking ema param names

---
 doc/ADVANCED_TWEAKING.md | 2 +-
 trainSD21.json           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/ADVANCED_TWEAKING.md b/doc/ADVANCED_TWEAKING.md
index 894a894..310f06e 100644
--- a/doc/ADVANCED_TWEAKING.md
+++ b/doc/ADVANCED_TWEAKING.md
@@ -230,7 +230,7 @@ In this mode, the EMA model will be saved alongside the regular checkpoint from
 For more information, consult the [research paper](https://arxiv.org/abs/2101.08482) or continue reading the tuning notes below. 
 **Parameters:**  
 - `--ema_decay_rate`: Determines the EMA decay rate. It defines how much the EMA model is updated from training at each update. Values should be close to 1 but not exceed it. Activating this parameter triggers the EMA decay feature.
-- `--ema_decay_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature.
+- `--ema_strength_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature.
 - `--ema_update_interval`: Set the interval in steps between EMA updates. The update occurs at each optimizer step.  If you use grad_accum, actual update interval will be multipled by your grad_accum value.
 - `--ema_device`: Choose between `cpu` and `cuda` for EMA. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM.
 - `--ema_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled.
diff --git a/trainSD21.json b/trainSD21.json
index 01347ef..da3672c 100644
--- a/trainSD21.json
+++ b/trainSD21.json
@@ -44,7 +44,7 @@
   "load_settings_every_epoch": false,
   "min_snr_gamma": null,
   "ema_decay_rate": null,
-  "ema_decay_target": null,
+  "ema_strength_target": null,
   "ema_update_interval": null,
   "ema_device": null,
   "ema_sample_raw_training": false,

From 29bab698a3ae69065059e5911d09a3846a4b564e Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 18 Sep 2023 15:07:39 -0400
Subject: [PATCH 3/5] minor update to ema docs

---
 doc/ADVANCED_TWEAKING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/ADVANCED_TWEAKING.md b/doc/ADVANCED_TWEAKING.md
index 310f06e..d38886f 100644
--- a/doc/ADVANCED_TWEAKING.md
+++ b/doc/ADVANCED_TWEAKING.md
@@ -230,7 +230,7 @@ In this mode, the EMA model will be saved alongside the regular checkpoint from
 For more information, consult the [research paper](https://arxiv.org/abs/2101.08482) or continue reading the tuning notes below. 
 **Parameters:**  
 - `--ema_decay_rate`: Determines the EMA decay rate. It defines how much the EMA model is updated from training at each update. Values should be close to 1 but not exceed it. Activating this parameter triggers the EMA decay feature.
-- `--ema_strength_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature.
+- `--ema_strength_target`: Set the EMA strength target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. See [ema_strength_target](#ema_strength_target) for more information.
 - `--ema_update_interval`: Set the interval in steps between EMA updates. The update occurs at each optimizer step.  If you use grad_accum, actual update interval will be multipled by your grad_accum value.
 - `--ema_device`: Choose between `cpu` and `cuda` for EMA. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM.
 - `--ema_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled.

From 303c8312e39d77aa6f9b996ff3cadb89bf289072 Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 18 Sep 2023 16:12:51 -0400
Subject: [PATCH 4/5] update ema sample args again

---
 doc/ADVANCED_TWEAKING.md | 2 +-
 train.json               | 8 ++++----
 trainSD21.json           | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/ADVANCED_TWEAKING.md b/doc/ADVANCED_TWEAKING.md
index d38886f..dab9a47 100644
--- a/doc/ADVANCED_TWEAKING.md
+++ b/doc/ADVANCED_TWEAKING.md
@@ -233,7 +233,7 @@ For more information, consult the [research paper](https://arxiv.org/abs/2101.08
 - `--ema_strength_target`: Set the EMA strength target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. See [ema_strength_target](#ema_strength_target) for more information.
 - `--ema_update_interval`: Set the interval in steps between EMA updates. The update occurs at each optimizer step.  If you use grad_accum, actual update interval will be multipled by your grad_accum value.
 - `--ema_device`: Choose between `cpu` and `cuda` for EMA. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM.
-- `--ema_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled.
+- `--ema_sample_nonema_model`: Activate to display samples from the non-ema trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled.
 - `--ema_sample_ema_model`: Turn on to exhibit samples from the EMA model. EMA models will be used for samples generations by default with EMA decay enabled, unless disabled.
 - `--ema_resume_model`: Indicate the EMA decay checkpoint to continue from, working like `--resume_ckpt` but will load EMA model. Using `findlast` will only load EMA version and not regular training.
 
diff --git a/train.json b/train.json
index f74e100..755ee76 100644
--- a/train.json
+++ b/train.json
@@ -4,7 +4,7 @@
   "clip_grad_norm": null,
   "clip_skip": 0,
   "cond_dropout": 0.04,
-  "data_root": "X:\\my_project_data\\project_abc",
+  "data_root": "/mnt/q/training_samples/ff7r/man",
   "disable_amp": false,
   "disable_textenc_training": false,
   "disable_xformers": false,
@@ -19,7 +19,7 @@
   "lr_decay_steps": 0,
   "lr_scheduler": "constant",
   "lr_warmup_steps": null,
-  "max_epochs": 30,
+  "max_epochs": 1,
   "notebook": false,
   "optimizer_config": "optimizer.json",
   "project_name": "project_abc",
@@ -45,10 +45,10 @@
   "load_settings_every_epoch": false,
   "min_snr_gamma": null,
   "ema_decay_rate": null,
-  "ema_decay_target": null,
+  "ema_strength_target": null,
   "ema_update_interval": null,
   "ema_device": null,
-  "ema_sample_raw_training": false,
+  "ema_sample_nonema_model": false,
   "ema_sample_ema_model": false,
   "ema_resume_model" : null
 }
diff --git a/trainSD21.json b/trainSD21.json
index da3672c..56d8fd1 100644
--- a/trainSD21.json
+++ b/trainSD21.json
@@ -47,7 +47,7 @@
   "ema_strength_target": null,
   "ema_update_interval": null,
   "ema_device": null,
-  "ema_sample_raw_training": false,
+  "ema_sample_nonema_model": false,
   "ema_sample_ema_model": false,
   "ema_resume_model" : null
 }

From 2dff3aa8d1c304b2923744a179ef0d3d7e61e6a2 Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 18 Sep 2023 16:13:22 -0400
Subject: [PATCH 5/5] ema update

---
 train.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/train.py b/train.py
index 2fdd944..af9cbc6 100644
--- a/train.py
+++ b/train.py
@@ -369,7 +369,6 @@ def log_args(log_writer, args):
     log_writer.add_text("config", arglog)
 
 def update_ema(model, ema_model, decay, default_device, ema_device):
-
     with torch.no_grad():
         original_model_on_proper_device = model
         need_to_delete_original = False
@@ -970,7 +969,7 @@ def main(args):
 
             models_info = []
 
-            if (args.ema_decay_rate is None) or args.ema_sample_raw_training:
+            if (args.ema_decay_rate is None) or args.ema_sample_nonema_model:
                 models_info.append({"is_ema": False, "swap_required": False})
 
             if (args.ema_decay_rate is not None) and args.ema_sample_ema_model:
@@ -1035,8 +1034,6 @@ def main(args):
         return os.path.join(f"{log_folder}/ckpts/{prepend}{args.project_name}-ep{epoch:02}-gs{global_step:05}")
 
 
-
-
     # Pre-train validation to establish a starting point on the loss graph
     if validator:
         validator.do_validation(global_step=0,
@@ -1065,7 +1062,6 @@ def main(args):
             if args.load_settings_every_epoch:
                 load_train_json_from_file(args)
 
-
             plugin_runner.run_on_epoch_start(epoch=epoch,
                                       global_step=global_step,
                                       project_name=args.project_name,
@@ -1087,6 +1083,7 @@ def main(args):
 
             for step, batch in enumerate(train_dataloader):
                 step_start_time = time.time()
+
                 plugin_runner.run_on_step_start(epoch=epoch,
                         global_step=global_step,
                         project_name=args.project_name,