"text_encoder_lr_scale":"if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.",
"-----------------":"-----------------",
"optimizer":"adamw, adamw8bit, lion",
"optimizer_desc":"'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
"lr":"learning rate, if null will use CLI or main JSON config value",
"lr_scheduler":"'constant' or 'cosine'",
"lr_warmup_steps":"number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs",
"lr_decay_steps":"number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs",
"betas":"exponential decay rates for the moment estimates",
"epsilon":"value added to denominator for numerical stability, unused for lion",
"weight_decay":"weight decay (L2 penalty)",
"------------------":"-----------------",
"freeze_embeddings":"whether to freeze the text embeddings",
"freeze_front_n_layers":"if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)",
"freeze_final_layer_norm":"whether to freeze the text encoder's final layer norm"