From e4d93225f7fe8ad87312453110e4516b764853c7 Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 18 Sep 2023 14:53:17 -0400 Subject: [PATCH 1/5] fix train21.json ema param names --- trainSD21.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trainSD21.json b/trainSD21.json index 9764522..01347ef 100644 --- a/trainSD21.json +++ b/trainSD21.json @@ -45,9 +45,9 @@ "min_snr_gamma": null, "ema_decay_rate": null, "ema_decay_target": null, - "ema_decay_interval": null, - "ema_decay_device": null, - "ema_decay_sample_raw_training": false, - "ema_decay_sample_ema_model": false, - "ema_decay_resume_model" : null + "ema_update_interval": null, + "ema_device": null, + "ema_sample_raw_training": false, + "ema_sample_ema_model": false, + "ema_resume_model" : null } From 2f52832209267d904d53b799534549e9f1484aa7 Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 18 Sep 2023 14:54:43 -0400 Subject: [PATCH 2/5] fix trainSD21.json and advanced tweaking ema param names --- doc/ADVANCED_TWEAKING.md | 2 +- trainSD21.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/ADVANCED_TWEAKING.md b/doc/ADVANCED_TWEAKING.md index 894a894..310f06e 100644 --- a/doc/ADVANCED_TWEAKING.md +++ b/doc/ADVANCED_TWEAKING.md @@ -230,7 +230,7 @@ In this mode, the EMA model will be saved alongside the regular checkpoint from For more information, consult the [research paper](https://arxiv.org/abs/2101.08482) or continue reading the tuning notes below. **Parameters:** - `--ema_decay_rate`: Determines the EMA decay rate. It defines how much the EMA model is updated from training at each update. Values should be close to 1 but not exceed it. Activating this parameter triggers the EMA decay feature. -- `--ema_decay_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. +- `--ema_strength_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. - `--ema_update_interval`: Set the interval in steps between EMA updates. The update occurs at each optimizer step. If you use grad_accum, actual update interval will be multipled by your grad_accum value. - `--ema_device`: Choose between `cpu` and `cuda` for EMA. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM. - `--ema_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled. diff --git a/trainSD21.json b/trainSD21.json index 01347ef..da3672c 100644 --- a/trainSD21.json +++ b/trainSD21.json @@ -44,7 +44,7 @@ "load_settings_every_epoch": false, "min_snr_gamma": null, "ema_decay_rate": null, - "ema_decay_target": null, + "ema_strength_target": null, "ema_update_interval": null, "ema_device": null, "ema_sample_raw_training": false, From 29bab698a3ae69065059e5911d09a3846a4b564e Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 18 Sep 2023 15:07:39 -0400 Subject: [PATCH 3/5] minor update to ema docs --- doc/ADVANCED_TWEAKING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/ADVANCED_TWEAKING.md b/doc/ADVANCED_TWEAKING.md index 310f06e..d38886f 100644 --- a/doc/ADVANCED_TWEAKING.md +++ b/doc/ADVANCED_TWEAKING.md @@ -230,7 +230,7 @@ In this mode, the EMA model will be saved alongside the regular checkpoint from For more information, consult the [research paper](https://arxiv.org/abs/2101.08482) or continue reading the tuning notes below. **Parameters:** - `--ema_decay_rate`: Determines the EMA decay rate. It defines how much the EMA model is updated from training at each update. Values should be close to 1 but not exceed it. Activating this parameter triggers the EMA decay feature. -- `--ema_strength_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. +- `--ema_strength_target`: Set the EMA strength target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. See [ema_strength_target](#ema_strength_target) for more information. - `--ema_update_interval`: Set the interval in steps between EMA updates. The update occurs at each optimizer step. If you use grad_accum, actual update interval will be multipled by your grad_accum value. - `--ema_device`: Choose between `cpu` and `cuda` for EMA. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM. - `--ema_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled. From 303c8312e39d77aa6f9b996ff3cadb89bf289072 Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 18 Sep 2023 16:12:51 -0400 Subject: [PATCH 4/5] update ema sample args again --- doc/ADVANCED_TWEAKING.md | 2 +- train.json | 8 ++++---- trainSD21.json | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/ADVANCED_TWEAKING.md b/doc/ADVANCED_TWEAKING.md index d38886f..dab9a47 100644 --- a/doc/ADVANCED_TWEAKING.md +++ b/doc/ADVANCED_TWEAKING.md @@ -233,7 +233,7 @@ For more information, consult the [research paper](https://arxiv.org/abs/2101.08 - `--ema_strength_target`: Set the EMA strength target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA feature. See [ema_strength_target](#ema_strength_target) for more information. - `--ema_update_interval`: Set the interval in steps between EMA updates. The update occurs at each optimizer step. If you use grad_accum, actual update interval will be multipled by your grad_accum value. - `--ema_device`: Choose between `cpu` and `cuda` for EMA. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM. -- `--ema_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled. +- `--ema_sample_nonema_model`: Activate to display samples from the non-ema trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled. - `--ema_sample_ema_model`: Turn on to exhibit samples from the EMA model. EMA models will be used for samples generations by default with EMA decay enabled, unless disabled. - `--ema_resume_model`: Indicate the EMA decay checkpoint to continue from, working like `--resume_ckpt` but will load EMA model. Using `findlast` will only load EMA version and not regular training. diff --git a/train.json b/train.json index f74e100..755ee76 100644 --- a/train.json +++ b/train.json @@ -4,7 +4,7 @@ "clip_grad_norm": null, "clip_skip": 0, "cond_dropout": 0.04, - "data_root": "X:\\my_project_data\\project_abc", + "data_root": "/mnt/q/training_samples/ff7r/man", "disable_amp": false, "disable_textenc_training": false, "disable_xformers": false, @@ -19,7 +19,7 @@ "lr_decay_steps": 0, "lr_scheduler": "constant", "lr_warmup_steps": null, - "max_epochs": 30, + "max_epochs": 1, "notebook": false, "optimizer_config": "optimizer.json", "project_name": "project_abc", @@ -45,10 +45,10 @@ "load_settings_every_epoch": false, "min_snr_gamma": null, "ema_decay_rate": null, - "ema_decay_target": null, + "ema_strength_target": null, "ema_update_interval": null, "ema_device": null, - "ema_sample_raw_training": false, + "ema_sample_nonema_model": false, "ema_sample_ema_model": false, "ema_resume_model" : null } diff --git a/trainSD21.json b/trainSD21.json index da3672c..56d8fd1 100644 --- a/trainSD21.json +++ b/trainSD21.json @@ -47,7 +47,7 @@ "ema_strength_target": null, "ema_update_interval": null, "ema_device": null, - "ema_sample_raw_training": false, + "ema_sample_nonema_model": false, "ema_sample_ema_model": false, "ema_resume_model" : null } From 2dff3aa8d1c304b2923744a179ef0d3d7e61e6a2 Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 18 Sep 2023 16:13:22 -0400 Subject: [PATCH 5/5] ema update --- train.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/train.py b/train.py index 2fdd944..af9cbc6 100644 --- a/train.py +++ b/train.py @@ -369,7 +369,6 @@ def log_args(log_writer, args): log_writer.add_text("config", arglog) def update_ema(model, ema_model, decay, default_device, ema_device): - with torch.no_grad(): original_model_on_proper_device = model need_to_delete_original = False @@ -970,7 +969,7 @@ def main(args): models_info = [] - if (args.ema_decay_rate is None) or args.ema_sample_raw_training: + if (args.ema_decay_rate is None) or args.ema_sample_nonema_model: models_info.append({"is_ema": False, "swap_required": False}) if (args.ema_decay_rate is not None) and args.ema_sample_ema_model: @@ -1035,8 +1034,6 @@ def main(args): return os.path.join(f"{log_folder}/ckpts/{prepend}{args.project_name}-ep{epoch:02}-gs{global_step:05}") - - # Pre-train validation to establish a starting point on the loss graph if validator: validator.do_validation(global_step=0, @@ -1065,7 +1062,6 @@ def main(args): if args.load_settings_every_epoch: load_train_json_from_file(args) - plugin_runner.run_on_epoch_start(epoch=epoch, global_step=global_step, project_name=args.project_name, @@ -1087,6 +1083,7 @@ def main(args): for step, batch in enumerate(train_dataloader): step_start_time = time.time() + plugin_runner.run_on_step_start(epoch=epoch, global_step=global_step, project_name=args.project_name,