1. New parameters added to train.json and trainSD21.json - disabled by default.
2. Description added to ADVANCED_TWEAKING.md
This commit is contained in:
parent
5b1760fff2
commit
d2d493c911
|
@ -149,11 +149,7 @@ Based on [Nicholas Guttenberg's blog post](https://www.crosslabs.org//blog/diffu
|
|||
|
||||
Test results: https://huggingface.co/panopstor/ff7r-stable-diffusion/blob/main/zero_freq_test_biggs.webp
|
||||
|
||||
Very tentatively, I suggest closer to 0.10 for short term training, and lower values of around 0.02 to 0.03 for longer runs (50k+ steps). Early indications seem to suggest values like 0.10 can cause divergance over time.
|
||||
|
||||
## Zero terminal SNR
|
||||
|
||||
Set `zero_frequency_noise_ratio` to -1.
|
||||
Very tentatively, I suggest closer to 0.10 for short term training, and lower values of around 0.02 to 0.03 for longer runs (50k+ steps). Early indications seem to suggest values like 0.10 can cause divergance over time.
|
||||
|
||||
## Keeping images together (custom batching)
|
||||
|
||||
|
@ -205,3 +201,37 @@ Clips the gradient normals to a maximum value. Default is None (no clipping).
|
|||
|
||||
Default is no gradient normal clipping. There are also other ways to deal with gradient explosion, such as increasing optimizer epsilon.
|
||||
|
||||
## Zero Terminal SNR
|
||||
**Parameter:** `--enable_zero_terminal_snr`
|
||||
**Default:** `False`
|
||||
To enable zero terminal SNR.
|
||||
|
||||
## Dynamic Configuration Loading
|
||||
**Parameter:** `--load_settings_every_epoch`
|
||||
**Default:** `False`
|
||||
Most of the parameters in the train.json file CANNOT be modified during training. Activate this to have the `train.json` configuration file reloaded at the start of each epoch. The following parameter can be changed and will be applied after the start of a new epoch:
|
||||
- `--save_every_n_epochs`
|
||||
- `--save_ckpts_from_n_epochs`
|
||||
- `--save_full_precision`
|
||||
- `--save_optimizer`
|
||||
- `--zero_frequency_noise_ratio`
|
||||
- `--min_snr_gamma`
|
||||
- `--clip_skip`
|
||||
|
||||
## Min-SNR-Gamma Parameter
|
||||
**Parameter:** `--min_snr_gamma`
|
||||
**Recommended Values:** 5, 1, 20
|
||||
**Default:** `None`
|
||||
To enable min-SNR-Gamma. For an in-depth understanding, consult this [research paper](https://arxiv.org/abs/2303.09556).
|
||||
|
||||
## EMA Decay Features
|
||||
The Exponential Moving Average (EMA) model is copied from the base model at the start and is updated every interval of steps by a small contribution from training.
|
||||
In this mode, the EMA model will be saved alongside the regular checkpoint from training. Normal training checkpoint can be loaded with `--resume_ckpt`, and the EMA model can be loaded with `--ema_decay_resume_model`.
|
||||
**Parameters:**
|
||||
- `--ema_decay_rate`: Determines the EMA decay rate. It defines how much the EMA model is updated from training at each update. Values should be close to 1 but not exceed it. Activating this parameter triggers the EMA decay feature.
|
||||
- `--ema_decay_target`: Set the EMA decay target value within the (0,1) range. The `ema_decay_rate` is computed based on the relation: decay_rate to the power of (total_steps/decay_interval) equals decay_target. Enabling this parameter will override `ema_decay_rate` and will enable EMA decay feature.
|
||||
- `--ema_decay_interval`: Set the interval in steps between EMA decay updates. The update occurs at each `global_steps` modulo `decay_interval`.
|
||||
- `--ema_decay_device`: Choose between `cpu` and `cuda` for EMA decay. Opting for 'cpu' takes around 4 seconds per update and uses approximately 3.2GB RAM, while 'cuda' is much faster but requires a similar amount of VRAM.
|
||||
- `--ema_decay_sample_raw_training`: Activate to display samples from the trained model, mirroring conventional training. They will not be presented by default with EMA decay enabled.
|
||||
- `--ema_decay_sample_ema_model`: Turn on to exhibit samples from the EMA model. EMA models will be used for samples generations by default with EMA decay enabled, unless disabled.
|
||||
- `--ema_decay_resume_model`: Indicate the EMA decay checkpoint to continue from, working like `--resume_ckpt` but will load EMA model. Using `findlast` will only load EMA version and not regular training.
|
12
train.json
12
train.json
|
@ -40,5 +40,15 @@
|
|||
"write_schedule": false,
|
||||
"rated_dataset": false,
|
||||
"rated_dataset_target_dropout_percent": 50,
|
||||
"zero_frequency_noise_ratio": 0.02
|
||||
"zero_frequency_noise_ratio": 0.02,
|
||||
"enable_zero_terminal_snr": false,
|
||||
"load_settings_every_epoch": false,
|
||||
"min_snr_gamma": null,
|
||||
"ema_decay_rate": null,
|
||||
"ema_decay_target": null,
|
||||
"ema_decay_interval": null,
|
||||
"ema_decay_device": null,
|
||||
"ema_decay_sample_raw_training": false,
|
||||
"ema_decay_sample_ema_model": false,
|
||||
"ema_decay_resume_model" : null
|
||||
}
|
||||
|
|
5
train.py
5
train.py
|
@ -1287,6 +1287,8 @@ if __name__ == "__main__":
|
|||
argparser.add_argument("--rated_dataset_target_dropout_percent", type=int, default=50, help="how many images (in percent) should be included in the last epoch (Default 50)")
|
||||
argparser.add_argument("--zero_frequency_noise_ratio", type=float, default=0.02, help="adds zero frequency noise, for improving contrast (def: 0.0) use 0.0 to 0.15")
|
||||
argparser.add_argument("--enable_zero_terminal_snr", action="store_true", default=None, help="Use zero terminal SNR noising beta schedule")
|
||||
argparser.add_argument("--load_settings_every_epoch", action="store_true", default=None, help="Will load 'train.json' at start of every epoch. Disabled by default and enabled when used.")
|
||||
argparser.add_argument("--min_snr_gamma", type=int, default=None, help="min-SNR-gamma parameteris the loss function into individual tasks. Recommended values: 5, 1, 20. Disabled by default and enabled when used. More info: https://arxiv.org/abs/2303.09556")
|
||||
argparser.add_argument("--ema_decay_rate", type=float, default=None, help="EMA decay rate. EMA model will be updated with (1 - ema_decay_rate) from training, and the ema_decay_rate from previous EMA, every interval. Values less than 1 and not so far from 1. Using this parameter will enable the feature.")
|
||||
argparser.add_argument("--ema_decay_target", type=float, default=None, help="EMA decay target value in range (0,1). ema_decay_rate will be calculated from equation: decay_rate^(total_steps/decay_interval)=decay_target. Using this parameter will enable the feature and overide ema_decay_rate.")
|
||||
argparser.add_argument("--ema_decay_interval", type=int, default=500, help="How many steps between every EMA decay update. EMA model will be update on every global_steps modulo decay_interval.")
|
||||
|
@ -1294,8 +1296,7 @@ if __name__ == "__main__":
|
|||
argparser.add_argument("--ema_decay_sample_raw_training", action="store_true", default=False, help="Will show samples from trained model, just like regular training. Can be used with: --ema_decay_sample_ema_model")
|
||||
argparser.add_argument("--ema_decay_sample_ema_model", action="store_true", default=False, help="Will show samples from EMA model. Can be used with: --ema_decay_sample_raw_training")
|
||||
argparser.add_argument("--ema_decay_resume_model", type=str, default=None, help="The EMA decay checkpoint to resume from for EMA decay, either a local .ckpt file, a converted Diffusers format folder, or a Huggingface.co repo id such as stabilityai/stable-diffusion-2-1-ema-decay")
|
||||
argparser.add_argument("--min_snr_gamma", type=int, default=None, help="min-SNR-gamma parameteris the loss function into individual tasks. Recommended values: 5, 1, 20. Disabled by default and enabled when used. More info: https://arxiv.org/abs/2303.09556")
|
||||
argparser.add_argument("--load_settings_every_epoch", action="store_true", default=None, help="Will load 'train.json' at start of every epoch. Disabled by default and enabled when used.")
|
||||
|
||||
|
||||
# load CLI args to overwrite existing config args
|
||||
args = argparser.parse_args(args=argv, namespace=args)
|
||||
|
|
|
@ -39,5 +39,15 @@
|
|||
"write_schedule": false,
|
||||
"rated_dataset": false,
|
||||
"rated_dataset_target_dropout_percent": 50,
|
||||
"zero_frequency_noise_ratio": 0.02
|
||||
"zero_frequency_noise_ratio": 0.02,
|
||||
"enable_zero_terminal_snr": false,
|
||||
"load_settings_every_epoch": false,
|
||||
"min_snr_gamma": null,
|
||||
"ema_decay_rate": null,
|
||||
"ema_decay_target": null,
|
||||
"ema_decay_interval": null,
|
||||
"ema_decay_device": null,
|
||||
"ema_decay_sample_raw_training": false,
|
||||
"ema_decay_sample_ema_model": false,
|
||||
"ema_decay_resume_model" : null
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue