From 85ad289296ea10c553e6a01a1d98f6979635a621 Mon Sep 17 00:00:00 2001
From: Damian Stewart <d@damianstewart.com>
Date: Sun, 14 May 2023 11:53:00 +0200
Subject: [PATCH] update SD2.1 default training settings

---
 optimizer.json     |  8 ++++----
 optimizerSD21.json | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 trainSD21.json     |  6 +++---
 3 files changed, 53 insertions(+), 7 deletions(-)
 create mode 100644 optimizerSD21.json

diff --git a/optimizer.json b/optimizer.json
index f4b9e8e..2030c86 100644
--- a/optimizer.json
+++ b/optimizer.json
@@ -14,9 +14,9 @@
         "epsilon": "value added to denominator for numerical stability, unused for lion",
         "weight_decay": "weight decay (L2 penalty)",
         "------------------": "-----------------",
-        "freeze_embeddings": "freeze the text embeddings",
-        "freeze_front_n_layers": "freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)",
-        "freeze_final_layer_norm": "freeze the final layer norm"
+        "freeze_embeddings": "whether to freeze the text embeddings",
+        "freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)",
+        "freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm"
     },
     "base": {
         "optimizer": "adamw8bit",
@@ -41,6 +41,6 @@
     "text_encoder_freezing": {
         "freeze_embeddings": false,
         "freeze_front_n_layers": null,
-        "freeze_final_layer_norm": true
+        "freeze_final_layer_norm": false
     }
 }
diff --git a/optimizerSD21.json b/optimizerSD21.json
new file mode 100644
index 0000000..e0a698e
--- /dev/null
+++ b/optimizerSD21.json
@@ -0,0 +1,46 @@
+{
+    "doc": {
+        "base": "base optimizer configuration for unet and text encoder",
+        "text_encoder_overrides": "text encoder config overrides",
+        "text_encoder_lr_scale": "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.",
+        "-----------------": "-----------------",
+        "optimizer": "adamw, adamw8bit, lion",
+        "optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
+        "lr": "learning rate, if null will use CLI or main JSON config value",
+        "lr_scheduler": "'constant' or 'cosine'",
+        "lr_warmup_steps": "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs",
+        "lr_decay_steps": "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs",
+        "betas": "exponential decay rates for the moment estimates",
+        "epsilon": "value added to denominator for numerical stability, unused for lion",
+        "weight_decay": "weight decay (L2 penalty)",
+        "------------------": "-----------------",
+        "freeze_embeddings": "whether to freeze the text embeddings",
+        "freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)",
+        "freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm"
+    },
+    "base": {
+        "optimizer": "adamw8bit",
+        "lr": 1e-6,
+        "lr_scheduler": "constant",
+        "lr_decay_steps": null,
+        "lr_warmup_steps": null,
+        "betas": [0.9, 0.999],
+        "epsilon": 1e-8,
+        "weight_decay": 0.010
+    },
+    "text_encoder_overrides": {
+        "optimizer": null,
+        "lr": null,
+        "lr_scheduler": null,
+        "lr_decay_steps": null,
+        "lr_warmup_steps": null,
+        "betas": null,
+        "epsilon": null,
+        "weight_decay": null
+    },
+    "text_encoder_freezing": {
+        "freeze_embeddings": true,
+        "freeze_front_n_layers": -6,
+        "freeze_final_layer_norm": false
+    }
+}
diff --git a/trainSD21.json b/trainSD21.json
index 2759f78..7288b57 100644
--- a/trainSD21.json
+++ b/trainSD21.json
@@ -15,16 +15,16 @@
   "logdir": "logs",
   "log_step": 25,
   "lowvram": false,
-  "lr": 0.6e-06,
+  "lr": 2e-06,
   "lr_decay_steps": 0,
   "lr_scheduler": "constant",
   "lr_warmup_steps": null,
   "max_epochs": 30,
   "notebook": false,
-  "optimizer_config": "optimizer.json",
+  "optimizer_config": "optimizerSD21.json",
   "project_name": "project_abc_sd21",
   "resolution": 768,
-  "resume_ckpt": "v2-1_768-nonema-pruned",
+  "resume_ckpt": "stabilityai/stable-diffusion-2-1",
   "sample_prompts": "sample_prompts.txt",
   "sample_steps": 300,
   "save_ckpt_dir": null,