diff --git a/optimizer.json b/optimizer.json index f4b9e8e..2030c86 100644 --- a/optimizer.json +++ b/optimizer.json @@ -14,9 +14,9 @@ "epsilon": "value added to denominator for numerical stability, unused for lion", "weight_decay": "weight decay (L2 penalty)", "------------------": "-----------------", - "freeze_embeddings": "freeze the text embeddings", - "freeze_front_n_layers": "freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)", - "freeze_final_layer_norm": "freeze the final layer norm" + "freeze_embeddings": "whether to freeze the text embeddings", + "freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)", + "freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm" }, "base": { "optimizer": "adamw8bit", @@ -41,6 +41,6 @@ "text_encoder_freezing": { "freeze_embeddings": false, "freeze_front_n_layers": null, - "freeze_final_layer_norm": true + "freeze_final_layer_norm": false } } diff --git a/optimizerSD21.json b/optimizerSD21.json new file mode 100644 index 0000000..e0a698e --- /dev/null +++ b/optimizerSD21.json @@ -0,0 +1,46 @@ +{ + "doc": { + "base": "base optimizer configuration for unet and text encoder", + "text_encoder_overrides": "text encoder config overrides", + "text_encoder_lr_scale": "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.", + "-----------------": "-----------------", + "optimizer": "adamw, adamw8bit, lion", + "optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains", + "lr": "learning rate, if null will use CLI or main JSON config value", + "lr_scheduler": "'constant' or 'cosine'", + "lr_warmup_steps": "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs", + "lr_decay_steps": "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs", + "betas": "exponential decay rates for the moment estimates", + "epsilon": "value added to denominator for numerical stability, unused for lion", + "weight_decay": "weight decay (L2 penalty)", + "------------------": "-----------------", + "freeze_embeddings": "whether to freeze the text embeddings", + "freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)", + "freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm" + }, + "base": { + "optimizer": "adamw8bit", + "lr": 1e-6, + "lr_scheduler": "constant", + "lr_decay_steps": null, + "lr_warmup_steps": null, + "betas": [0.9, 0.999], + "epsilon": 1e-8, + "weight_decay": 0.010 + }, + "text_encoder_overrides": { + "optimizer": null, + "lr": null, + "lr_scheduler": null, + "lr_decay_steps": null, + "lr_warmup_steps": null, + "betas": null, + "epsilon": null, + "weight_decay": null + }, + "text_encoder_freezing": { + "freeze_embeddings": true, + "freeze_front_n_layers": -6, + "freeze_final_layer_norm": false + } +} diff --git a/trainSD21.json b/trainSD21.json index 2759f78..7288b57 100644 --- a/trainSD21.json +++ b/trainSD21.json @@ -15,16 +15,16 @@ "logdir": "logs", "log_step": 25, "lowvram": false, - "lr": 0.6e-06, + "lr": 2e-06, "lr_decay_steps": 0, "lr_scheduler": "constant", "lr_warmup_steps": null, "max_epochs": 30, "notebook": false, - "optimizer_config": "optimizer.json", + "optimizer_config": "optimizerSD21.json", "project_name": "project_abc_sd21", "resolution": 768, - "resume_ckpt": "v2-1_768-nonema-pruned", + "resume_ckpt": "stabilityai/stable-diffusion-2-1", "sample_prompts": "sample_prompts.txt", "sample_steps": 300, "save_ckpt_dir": null,